From: sage Date: Mon, 11 Jul 2005 07:19:16 +0000 (+0000) Subject: *** empty log message *** X-Git-Tag: v0.1~1939 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=4b9ac4b5de0e5c61fad4167b1af699482dc917de;p=ceph.git *** empty log message *** git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@442 29311d96-e01e-0410-9327-a35deaab8ce9 --- diff --git a/ceph/client/Buffercache.cc b/ceph/client/Buffercache.cc index 4fbb7915c275..38ea4e3e53b3 100644 --- a/ceph/client/Buffercache.cc +++ b/ceph/client/Buffercache.cc @@ -2,13 +2,13 @@ #include "include/config.h" #undef dout -#define dout(l) if (l<=g_conf.debug) cout << "client" << "." << pthread_self() << " " +#define dout(l) if (l<=g_conf.debug) cout << "client" << "." << pthread_self() << ".bc " // -- Bufferhead methods Bufferhead::Bufferhead(inodeno_t ino, off_t off, Buffercache *bc) : ref(0) { - dout(10) << "bc: new bufferhead ino: " << ino << " offset: " << off << endl; + dout(10) << "new bufferhead ino: " << ino << " offset: " << off << endl; this->ino = ino; offset = off; state = BUFHD_STATE_CLEAN; @@ -23,7 +23,7 @@ Bufferhead::Bufferhead(inodeno_t ino, off_t off, Buffercache *bc) : Bufferhead::~Bufferhead() { - dout(10) << "bc: destroying bufferhead ino: " << ino << " size: " << bl.length() << " offset: " << offset << endl; + dout(10) << "destroying bufferhead ino: " << ino << " size: " << bl.length() << " offset: " << offset << endl; assert(state == BUFHD_STATE_CLEAN); assert(ref == 0); assert(lru_is_expireable()); @@ -32,23 +32,23 @@ Bufferhead::~Bufferhead() bc->lru.lru_remove(this); // debug segmentation fault if (bl.buffers().empty()) { - dout(10) << "bc: bufferlist is empty" << endl; + dout(10) << "bufferlist is empty" << endl; #if 0 } else { for (list::iterator it = bl.buffers().begin(); it != bl.buffers().end(); it++) { - //dout(10) << "bc: bufferptr len: " << it->length() << " off: " << it->offset() << endl; - dout(10) << "bc: bufferptr: " << *it << endl; + //dout(10) << "bufferptr len: " << it->length() << " off: " << it->offset() << endl; + dout(10) << "bufferptr: " << *it << endl; } - dout(10) <<"bc: listed all bufferptrs" << endl; + dout(10) <<"listed all bufferptrs" << endl; #endif } } void Bufferhead::alloc_buffers(size_t size) { - dout(10) << "bc: allocating buffers size: " << size << endl; + dout(10) << "allocating buffers size: " << size << endl; while (size > 0) { if (size <= g_conf.client_bcache_alloc_maxsize) { size_t k = g_conf.client_bcache_alloc_minsize; @@ -57,17 +57,17 @@ void Bufferhead::alloc_buffers(size_t size) b->set_length(size); bl.push_back(b); bc->increase_size(size); - dout(10) << "bc: new buffer(" << asize << "), total: " << bl.length() << endl; + dout(10) << "new buffer(" << asize << "), total: " << bl.length() << endl; break; } buffer *b = new buffer(g_conf.client_bcache_alloc_maxsize); b->set_length(g_conf.client_bcache_alloc_maxsize); bl.push_back(b); - dout(10) << "bc: new buffer(" << g_conf.client_bcache_alloc_maxsize << "), total: " << bl.length() << endl; + dout(10) << "new buffer(" << g_conf.client_bcache_alloc_maxsize << "), total: " << bl.length() << endl; size -= g_conf.client_bcache_alloc_maxsize; bc->increase_size(g_conf.client_bcache_alloc_maxsize); } - dout(7) << "bc: allocated " << bl.buffers().size() << " buffers (" << bl.length() << " bytes) " << endl; + dout(7) << "allocated " << bl.buffers().size() << " buffers (" << bl.length() << " bytes) " << endl; } void Bufferhead::miss_start(size_t miss_len) @@ -83,7 +83,7 @@ void Bufferhead::miss_finish() assert(state == BUFHD_STATE_RX); state = BUFHD_STATE_CLEAN; bc->increase_size(bl.length()); - dout(6) << "bc: miss_finish: clean_size: " << bc->get_clean_size() << " dirty_size: " << bc->get_dirty_size() << " rx_size: " << bc->get_rx_size() << " tx_size: " << bc->get_tx_size() << " age: " << bc->dirty_buffers.get_age() << endl; + dout(6) << "miss_finish: clean_size: " << bc->get_clean_size() << " dirty_size: " << bc->get_dirty_size() << " rx_size: " << bc->get_rx_size() << " tx_size: " << bc->get_tx_size() << " age: " << bc->dirty_buffers.get_age() << endl; //assert(bl.length() == miss_len); wakeup_read_waiters(); wakeup_write_waiters(); @@ -92,13 +92,13 @@ void Bufferhead::miss_finish() void Bufferhead::dirty() { if (state == BUFHD_STATE_CLEAN) { - dout(6) << "bc: dirtying clean buffer size: " << bl.length() << endl; + dout(6) << "dirtying clean buffer size: " << bl.length() << endl; state = BUFHD_STATE_DIRTY; dirty_since = time(NULL); // start clock for dirty buffer here bc->lru.lru_touch(this); - dout(6) << "bc: dirty before: clean_size: " << bc->get_clean_size() << " dirty_size: " << bc->get_dirty_size() << " rx_size: " << bc->get_rx_size() << " tx_size: " << bc->get_tx_size() << " age: " << bc->dirty_buffers.get_age() << endl; + dout(6) << "dirty before: clean_size: " << bc->get_clean_size() << " dirty_size: " << bc->get_dirty_size() << " rx_size: " << bc->get_rx_size() << " tx_size: " << bc->get_tx_size() << " age: " << bc->dirty_buffers.get_age() << endl; bc->clean_to_dirty(bl.length()); - dout(6) << "bc: dirty after: clean_size: " << bc->get_clean_size() << " dirty_size: " << bc->get_dirty_size() << " rx_size: " << bc->get_rx_size() << " tx_size: " << bc->get_tx_size() << " age: " << bc->dirty_buffers.get_age() << endl; + dout(6) << "dirty after: clean_size: " << bc->get_clean_size() << " dirty_size: " << bc->get_dirty_size() << " rx_size: " << bc->get_rx_size() << " tx_size: " << bc->get_tx_size() << " age: " << bc->dirty_buffers.get_age() << endl; assert(!bc->dirty_buffers.exist(this)); bc->dirty_buffers.insert(this); get(); @@ -106,13 +106,13 @@ void Bufferhead::dirty() fc->dirty_buffers.insert(this); get(); } else { - dout(10) << "bc: dirtying dirty buffer size: " << bl.length() << endl; + dout(10) << "dirtying dirty buffer size: " << bl.length() << endl; } } void Bufferhead::dirtybuffers_erase() { - dout(10) << "bc: erase in dirtybuffers size: " << bl.length() << " in state " << state << endl; + dout(10) << "erase in dirtybuffers size: " << bl.length() << " in state " << state << endl; assert(bc->dirty_buffers.exist(this)); bc->dirty_buffers.erase(this); put(); @@ -123,27 +123,27 @@ void Bufferhead::dirtybuffers_erase() void Bufferhead::flush_start() { - dout(10) << "bc: flush_start" << endl; + dout(10) << "flush_start" << endl; assert(state == BUFHD_STATE_DIRTY); state = BUFHD_STATE_TX; dirtybuffers_erase(); assert(!bc->inflight_buffers.count(this)); bc->inflight_buffers.insert(this); bc->dirty_to_tx(bl.length()); - dout(6) << "bc: flush_start: clean_size: " << bc->get_clean_size() << " dirty_size: " << bc->get_dirty_size() << " rx_size: " << bc->get_rx_size() << " tx_size: " << bc->get_tx_size() << " age: " << bc->dirty_buffers.get_age() << endl; + dout(6) << "flush_start: clean_size: " << bc->get_clean_size() << " dirty_size: " << bc->get_dirty_size() << " rx_size: " << bc->get_rx_size() << " tx_size: " << bc->get_tx_size() << " age: " << bc->dirty_buffers.get_age() << endl; assert(!fc->inflight_buffers.count(this)); fc->inflight_buffers.insert(this); } void Bufferhead::flush_finish() { - dout(10) << "bc: flush_finish" << endl; + dout(10) << "flush_finish" << endl; assert(state == BUFHD_STATE_TX); state = BUFHD_STATE_CLEAN; assert(bc->inflight_buffers.count(this)); bc->inflight_buffers.erase(this); bc->tx_to_clean(bl.length()); - dout(6) << "bc: flush_finish: clean_size: " << bc->get_clean_size() << " dirty_size: " << bc->get_dirty_size() << " rx_size: " << bc->get_rx_size() << " tx_size: " << bc->get_tx_size() << " age: " << bc->dirty_buffers.get_age() << endl; + dout(6) << "flush_finish: clean_size: " << bc->get_clean_size() << " dirty_size: " << bc->get_dirty_size() << " rx_size: " << bc->get_rx_size() << " tx_size: " << bc->get_tx_size() << " age: " << bc->dirty_buffers.get_age() << endl; assert(fc->inflight_buffers.count(this)); fc->inflight_buffers.erase(this); wakeup_write_waiters(); // readers never wait on flushes @@ -151,9 +151,9 @@ void Bufferhead::flush_finish() void Bufferhead::claim_append(Bufferhead *other) { - dout(10) << "bc: claim_append old bl size: " << bl.buffers().size() << " length " << bl.length() << endl; + dout(10) << "claim_append old bl size: " << bl.buffers().size() << " length " << bl.length() << endl; bl.claim_append(other->bl); - dout(10) << "bc: claim_append new bl size: " << bl.buffers().size() << " length: " << bl.length() << endl; + dout(10) << "claim_append new bl size: " << bl.buffers().size() << " length: " << bl.length() << endl; // keep older time stamp if (other->dirty_since < dirty_since) dirty_since = other->dirty_since; other->bl.clear(); @@ -198,7 +198,7 @@ bool Dirtybuffers::exist(Bufferhead* bh) void Dirtybuffers::get_expired(time_t ttl, size_t left_dirty, set& to_flush) { - dout(6) << "bc: get_expired ttl: " << ttl << " left_dirty: " << left_dirty << endl; + dout(6) << "get_expired ttl: " << ttl << " left_dirty: " << left_dirty << endl; time_t now = time(NULL); for (multimap::iterator it = _dbufs.begin(); it != _dbufs.end(); @@ -208,7 +208,7 @@ void Dirtybuffers::get_expired(time_t ttl, size_t left_dirty, set& to_flush.insert(it->second); left_dirty -= it->second->bl.length(); } - dout(6) << "bc: get_expired to_flush.size(): " << to_flush.size() << endl; + dout(6) << "get_expired to_flush.size(): " << to_flush.size() << endl; } // -- Filecache methods @@ -216,22 +216,22 @@ void Dirtybuffers::get_expired(time_t ttl, size_t left_dirty, set& map::iterator Filecache::overlap(size_t len, off_t off) { // returns iterator to buffer overlapping specified extent or end() if no overlap exists - dout(7) << "bc: overlap " << len << " " << off << endl; + dout(7) << "overlap " << len << " " << off << endl; map::iterator it = buffer_map.lower_bound(off); if (it == buffer_map.end() || it->first < off + len) { - dout(10) << "bc: overlap -- either no lower bound or overlap found" << endl; + dout(10) << "overlap -- either no lower bound or overlap found" << endl; return it; } else if (it == buffer_map.begin()) { - dout(10) << "bc: overlap -- extent is below where buffer_map begins" << endl; + dout(10) << "overlap -- extent is below where buffer_map begins" << endl; return buffer_map.end(); } else { - dout(10) << "bc: overlap -- examining previous buffer" << endl; + dout(10) << "overlap -- examining previous buffer" << endl; it--; if (it->first + it->second->bl.length() > off) { - dout(10) << "bc: overlap -- found overlap with previous buffer" << endl; + dout(10) << "overlap -- found overlap with previous buffer" << endl; return it; } else { - dout(10) << "bc: overlap -- no overlap with previous buffer" << endl; + dout(10) << "overlap -- no overlap with previous buffer" << endl; return buffer_map.end(); } } @@ -245,42 +245,42 @@ Filecache::map_existing(size_t len, map& tx, map& holes) { - dout(7) << "bc: map_existing len: " << len << " off: " << start_off << endl; + dout(7) << "map_existing len: " << len << " off: " << start_off << endl; off_t need_off = start_off; off_t actual_off = start_off; map::iterator existing, rvalue = overlap(len, start_off); for (existing = rvalue; existing != buffer_map.end() && existing->first < start_off + len; existing++) { - dout(7) << "bc: map: found overlap at offset " << actual_off << endl; + dout(7) << "map: found overlap at offset " << actual_off << endl; actual_off = existing->first; Bufferhead *bh = existing->second; if (actual_off > need_off) { holes[need_off] = (size_t) (actual_off - need_off); - dout(10) << "bc: map: hole " << need_off << " " << holes[need_off] << endl; + dout(10) << "map: hole " << need_off << " " << holes[need_off] << endl; } if (bh->state == BUFHD_STATE_RX) { rx[actual_off] = bh; - dout(10) << "bc: map: rx " << actual_off << " " << rx[actual_off]->miss_len << endl; + dout(10) << "map: rx " << actual_off << " " << rx[actual_off]->miss_len << endl; } else if (bh->state == BUFHD_STATE_TX) { tx[actual_off] = bh; - dout(10) << "bc: map: tx " << actual_off << " " << tx[actual_off]->bl.length() << endl; + dout(10) << "map: tx " << actual_off << " " << tx[actual_off]->bl.length() << endl; } else { hits[actual_off] = bh; - dout(10) << "bc: map: hits " << actual_off << " " << hits[actual_off]->bl.length() << endl; + dout(10) << "map: hits " << actual_off << " " << hits[actual_off]->bl.length() << endl; } need_off = actual_off + bh->bl.length(); } if (need_off < actual_off + len) { holes[need_off] = (size_t) (actual_off + len - need_off); - dout(10) << "bc: map: hole " << need_off << " " << holes[need_off] << endl; + dout(10) << "map: hole " << need_off << " " << holes[need_off] << endl; } return rvalue; } void Filecache::simplify() { - dout(7) << "bc: simplify" << endl; + dout(7) << "simplify" << endl; list removed; map::iterator start, next; start = buffer_map.begin(); @@ -295,7 +295,7 @@ void Filecache::simplify() start->second->offset + start->second->bl.length() == next->second->offset && next->second->read_waiters.empty() && next->second->write_waiters.empty()) { - dout(10) << "bc: simplify start: " << start->first << " next: " << next->first << endl; + dout(10) << "simplify start: " << start->first << " next: " << next->first << endl; Bufferhead *bh = next->second; start->second->claim_append(bh); if (bh->state == BUFHD_STATE_DIRTY) { @@ -307,14 +307,14 @@ void Filecache::simplify() next++; } if (next != buffer_map.end()) { - dout(10) << "bc: simplify failed, start state: " << start->second->state << " next state: " << next->second->state << endl; - dout(10) << "bc: simplify failed, start offset + len " << start->second->offset + start->second->bl.length() << " next offset: " << next->second->offset << endl; - dout(10) << "bc: simplify failed, " << next->second->read_waiters.size() << " read waiters" << endl; - dout(10) << "bc: simplify failed, " << next->second->write_waiters.size() << " write waiters" << endl; + dout(10) << "simplify failed, start state: " << start->second->state << " next state: " << next->second->state << endl; + dout(10) << "simplify failed, start offset + len " << start->second->offset + start->second->bl.length() << " next offset: " << next->second->offset << endl; + dout(10) << "simplify failed, " << next->second->read_waiters.size() << " read waiters" << endl; + dout(10) << "simplify failed, " << next->second->write_waiters.size() << " write waiters" << endl; } start = next; } - dout(7) << "bc: simplified " << count << " buffers" << endl; + dout(7) << "simplified " << count << " buffers" << endl; for (list::iterator it = removed.begin(); it != removed.end(); it++) { @@ -326,7 +326,7 @@ void Filecache::simplify() int Filecache::copy_out(size_t size, off_t offset, char *dst) { - dout(7) << "bc: copy_out size: " << size << " offset: " << offset << endl; + dout(7) << "copy_out size: " << size << " offset: " << offset << endl; assert(offset >= 0); assert(offset + size <= length()); int rvalue = size; @@ -336,21 +336,21 @@ int Filecache::copy_out(size_t size, off_t offset, char *dst) return -1; } offset -= curbuf->first; - if (offset < 0) dout(10) << "bc: copy_out: curbuf offset: " << curbuf->first << endl; + if (offset < 0) dout(10) << "copy_out: curbuf offset: " << curbuf->first << endl; assert(offset >= 0); while (size > 0) { Bufferhead *bh = curbuf->second; if (offset + size <= bh->bl.length()) { - dout(10) << "bc: copy_out bh len: " << bh->bl.length() << endl; - dout(10) << "bc: want to copy off: " << offset << " size: " << size << endl; + dout(10) << "copy_out bh len: " << bh->bl.length() << endl; + dout(10) << "want to copy off: " << offset << " size: " << size << endl; bh->bl.copy(offset, size, dst); break; } int howmuch = bh->bl.length() - offset; - dout(10) << "bc: copy_out bh len: " << bh->bl.length() << endl; - dout(10) << "bc: want to copy off: " << offset << " size: " << howmuch << endl; + dout(10) << "copy_out bh len: " << bh->bl.length() << endl; + dout(10) << "want to copy off: " << offset << " size: " << howmuch << endl; bh->bl.copy(offset, howmuch, dst); dst += howmuch; @@ -366,7 +366,7 @@ int Filecache::copy_out(size_t size, off_t offset, char *dst) void Buffercache::dirty(inodeno_t ino, size_t size, off_t offset, const char *src) { - dout(6) << "bc: dirty ino: " << ino << " size: " << size << " offset: " << offset << endl; + dout(6) << "dirty ino: " << ino << " size: " << size << " offset: " << offset << endl; assert(bcache_map.count(ino)); // filecache has to be already allocated!! Filecache *fc = get_fc(ino); assert(offset >= 0); @@ -397,7 +397,7 @@ void Buffercache::dirty(inodeno_t ino, size_t size, off_t offset, const char *sr int Buffercache::touch_continuous(map& hits, size_t size, off_t offset) { - dout(7) << "bc: touch_continuous size: " << size << " offset: " << offset << endl; + dout(7) << "touch_continuous size: " << size << " offset: " << offset << endl; off_t next_off = offset; for (map::iterator curbuf = hits.begin(); curbuf != hits.end(); @@ -418,7 +418,7 @@ void Buffercache::map_or_alloc(inodeno_t ino, size_t size, off_t offset, map& rx, map& tx) { - dout(7) << "bc: map_or_alloc len: " << size << " off: " << offset << endl; + dout(7) << "map_or_alloc len: " << size << " off: " << offset << endl; Filecache *fc = get_fc(ino); map holes; fc->map_existing(size, offset, buffers, rx, tx, holes); @@ -426,7 +426,7 @@ void Buffercache::map_or_alloc(inodeno_t ino, size_t size, off_t offset, for (map::iterator hole = holes.begin(); hole != holes.end(); hole++) { - dout(10) << "bc: allocate hole " << hole->first << " " << hole->second << endl; + dout(10) << "allocate hole " << hole->first << " " << hole->second << endl; assert(buffers.count(hole->first) == 0); Bufferhead *bh = new Bufferhead(ino, hole->first, this); buffers[hole->first] = bh; @@ -438,7 +438,7 @@ void Buffercache::map_or_alloc(inodeno_t ino, size_t size, off_t offset, void Buffercache::release_file(inodeno_t ino) { - dout(7) << "bc: release_file ino: " << ino << endl; + dout(7) << "release_file ino: " << ino << endl; assert(bcache_map.count(ino)); Filecache *fc = bcache_map[ino]; for (map::iterator it = fc->buffer_map.begin(); @@ -447,7 +447,7 @@ void Buffercache::release_file(inodeno_t ino) decrease_size(it->second->bl.length()); - dout(6) << "bc: release_file: clean_size: " << get_clean_size() << " dirty_size: " << get_dirty_size() << " rx_size: " << get_rx_size() << " tx_size: " << get_tx_size() << " age: " << dirty_buffers.get_age() << endl; + dout(6) << "release_file: clean_size: " << get_clean_size() << " dirty_size: " << get_dirty_size() << " rx_size: " << get_rx_size() << " tx_size: " << get_tx_size() << " age: " << dirty_buffers.get_age() << endl; assert(clean_size >= 0); delete it->second; } @@ -471,22 +471,22 @@ void Buffercache::get_reclaimable(size_t min_size, list& reclaimed) size_t Buffercache::reclaim(size_t min_size) { - dout(7) << "bc: reclaim min_size: " << min_size << endl; + dout(7) << "reclaim min_size: " << min_size << endl; size_t freed_size = 0; while (freed_size <= min_size) { Bufferhead *bh = (Bufferhead*)lru.lru_expire(); if (!bh) { - dout(6) << "bc: nothing more to reclaim -- freed_size: " << freed_size << endl; + dout(6) << "nothing more to reclaim -- freed_size: " << freed_size << endl; assert(0); break; // nothing more to reclaim } else { - dout(6) << "bc: reclaim: offset: " << bh->offset << " len: " << bh->bl.length() << endl; + dout(6) << "reclaim: offset: " << bh->offset << " len: " << bh->bl.length() << endl; assert(bh->state == BUFHD_STATE_CLEAN); freed_size += bh->bl.length(); decrease_size(bh->bl.length()); - dout(6) << "bc: reclaim: clean_size: " << get_clean_size() << " dirty_size: " << get_dirty_size() << " rx_size: " << get_rx_size() << " tx_size: " << get_tx_size() << " age: " << dirty_buffers.get_age() << endl; + dout(6) << "reclaim: clean_size: " << get_clean_size() << " dirty_size: " << get_dirty_size() << " rx_size: " << get_rx_size() << " tx_size: " << get_tx_size() << " age: " << dirty_buffers.get_age() << endl; assert(clean_size >= 0); bh->fc->buffer_map.erase(bh->offset); if (bh->fc->buffer_map.empty()) { diff --git a/ceph/client/Client.cc b/ceph/client/Client.cc index a1058d793db7..54652eb15bb4 100644 --- a/ceph/client/Client.cc +++ b/ceph/client/Client.cc @@ -406,7 +406,8 @@ public: } void finish(int r) { bh->flush_finish(); - if (bh->fc->inflight_buffers.empty()) bh->fc->wakeup_inflight_waiters(); + if (bh->fc->inflight_buffers.empty()) + bh->fc->wakeup_inflight_waiters(); } }; @@ -415,14 +416,14 @@ void Client::flush_inode_buffers(Inode *in) { if (!in->inflight_buffers.empty()) { dout(7) << "inflight buffers of sync write, waiting" << endl; - Cond *cond = new Cond; - in->waitfor_flushed.push_back(cond); - cond->Wait(client_lock); - delete cond; + Cond cond; + in->waitfor_flushed.push_back(&cond); + cond.Wait(client_lock); assert(in->inflight_buffers.empty()); dout(7) << "inflight buffers flushed" << endl; -#ifdef BUFFERCACHE - } else if (!bc.get_fc(in->inode.ino)->dirty_buffers.empty()) { + } + else if (g_conf.client_bcache && + !bc.get_fc(in->inode.ino)->dirty_buffers.empty()) { Filecache *fc = bc.get_fc(in->inode.ino); dout(7) << "bc: flush_inode_buffers: inode " << in->inode.ino << " has " << fc->dirty_buffers.size() << " dirty buffers" << endl; //fc->simplify(); @@ -437,8 +438,8 @@ void Client::flush_inode_buffers(Inode *in) } dout(7) << "flush_inode_buffers: dirty buffers, waiting" << endl; fc->wait_for_inflight(client_lock); -#endif - } else { + } + else { dout(7) << "no inflight buffers" << endl; } } @@ -451,7 +452,8 @@ public: } void finish(int r) { bh->flush_finish(); - if (bh->bc->inflight_buffers.empty()) bh->bc->wakeup_inflight_waiters(); + if (bh->bc->inflight_buffers.empty()) + bh->bc->wakeup_inflight_waiters(); } }; @@ -500,10 +502,11 @@ void Client::trim_bcache() */ void Client::release_inode_buffers(Inode *in) { -#ifdef BUFFERCACHE - // Check first we actually cached the file - if (bc.bcache_map.count(in->inode.ino)) bc.release_file(in->inode.ino); -#endif + if (g_conf.client_bcache) { + // Check first we actually cached the file + if (bc.bcache_map.count(in->inode.ino)) + bc.release_file(in->inode.ino); + } } @@ -1300,14 +1303,12 @@ int Client::read(fileh_t fh, char *buf, size_t size, off_t offset) Inode *in = f->inode; // do we have read file cap? - Cond *cond = 0; while (f->caps & CFILE_CAP_RD == 0) { dout(7) << " don't have read cap, waiting" << endl; - if (!cond) cond = new Cond; - in->waitfor_read.push_back(cond); - cond->Wait(client_lock); + Cond cond; + in->waitfor_read.push_back(&cond); + cond.Wait(client_lock); } - if (cond) delete cond; // determine whether read range overlaps with file @@ -1320,8 +1321,9 @@ int Client::read(fileh_t fh, char *buf, size_t size, off_t offset) if (size > in->inode.size) size = in->inode.size; int rvalue = 0; -#ifndef BUFFERCACHE - { + + if (!g_conf.client_bcache) { + // buffer cache OFF Cond cond; bufferlist blist; // data will go here @@ -1332,70 +1334,72 @@ int Client::read(fileh_t fh, char *buf, size_t size, off_t offset) // copy data into caller's buf blist.copy(0, blist.length(), buf); } - -#else - // map buffercache - map hits, rx, tx; - map::iterator curbuf; - map holes; - map::iterator hole; - - Filecache *fc = bc.get_fc(in->inode.ino); - curbuf = fc->map_existing(size, offset, hits, rx, tx, holes); - - if (curbuf != fc->buffer_map.end() && hits.count(curbuf->first)) { - // sweet -- we can return stuff immediately: find out how much - dout(7) << "read bc hit" << endl; - rvalue = (int)bc.touch_continuous(hits, size, offset); - assert(rvalue > 0); - rvalue = fc->copy_out((size_t)rvalue, offset, buf); - assert(rvalue > 0); - dout(7) << "read bc hit: immediately returning " << rvalue << " bytes" << endl; - } - // issue reads for holes - int hole_rvalue = 0; //FIXME: don't really need to track rvalue in MissFinish context - for (hole = holes.begin(); hole != holes.end(); hole++) { - dout(7) << "read bc miss" << endl; - off_t hole_offset = hole->first; - size_t hole_size = hole->second; - - // insert new bufferhead without allocating buffers (Filer::handle_osd_read_reply allocates them) - Bufferhead *bh = new Bufferhead(in->inode.ino, hole_offset, &bc); - - // read into the buffercache: when finished transition state from inflight to clean - bh->miss_start(hole_size); - C_Client_MissFinish *onfinish = new C_Client_MissFinish(bh, &client_lock, &hole_rvalue); - filer->read(in->inode.ino, g_OSD_FileLayout, hole_size, hole_offset, &(bh->bl), onfinish); - dout(7) << "read bc miss: issued osd read len: " << hole_size << " off: " << hole_offset << endl; + else { + // buffer cache ON + + // map buffercache + map hits, rx, tx; + map::iterator curbuf; + map holes; + map::iterator hole; + + Filecache *fc = bc.get_fc(in->inode.ino); + curbuf = fc->map_existing(size, offset, hits, rx, tx, holes); + + if (curbuf != fc->buffer_map.end() && hits.count(curbuf->first)) { + // sweet -- we can return stuff immediately: find out how much + dout(7) << "read bc hit" << endl; + rvalue = (int)bc.touch_continuous(hits, size, offset); + assert(rvalue > 0); + rvalue = fc->copy_out((size_t)rvalue, offset, buf); + assert(rvalue > 0); + dout(7) << "read bc hit: immediately returning " << rvalue << " bytes" << endl; + } + // issue reads for holes + int hole_rvalue = 0; //FIXME: don't really need to track rvalue in MissFinish context + for (hole = holes.begin(); hole != holes.end(); hole++) { + dout(7) << "read bc miss" << endl; + off_t hole_offset = hole->first; + size_t hole_size = hole->second; + + // insert new bufferhead without allocating buffers (Filer::handle_osd_read_reply allocates them) + Bufferhead *bh = new Bufferhead(in->inode.ino, hole_offset, &bc); + + // read into the buffercache: when finished transition state from inflight to clean + bh->miss_start(hole_size); + C_Client_MissFinish *onfinish = new C_Client_MissFinish(bh, &client_lock, &hole_rvalue); + filer->read(in->inode.ino, g_OSD_FileLayout, hole_size, hole_offset, &(bh->bl), onfinish); + dout(7) << "read bc miss: issued osd read len: " << hole_size << " off: " << hole_offset << endl; + } + + if (rvalue == 0) { + // we need to wait for the first buffer + dout(7) << "read bc miss: waiting for first buffer" << endl; + Bufferhead *bh; + if (curbuf == fc->buffer_map.end() && fc->buffer_map.count(offset)) { + dout(10) << "first buffer is currently read in" << endl; + bh = fc->buffer_map[offset]; + } else { + dout(10) << "first buffer is either hit or inflight" << endl; + bh = curbuf->second; + } + if (bh->state == BUFHD_STATE_RX || bh->state == BUFHD_STATE_TX) { + dout(10) << "waiting for first buffer" << endl; + bh->wait_for_read(client_lock); + } + + // buffer is filled -- see how much we can return + hits.clear(); rx.clear(); tx.clear(); holes.clear(); + fc->map_existing(size, offset, hits, rx, tx, holes); // FIXME: overkill + assert(hits.count(bh->offset)); + rvalue = bc.touch_continuous(hits, size, offset); + fc->copy_out(rvalue, offset, buf); + dout(7) << "read bc no hit: returned first " << rvalue << " bytes" << endl; + + trim_bcache(); + } } - - if (rvalue == 0) { - // we need to wait for the first buffer - dout(7) << "read bc miss: waiting for first buffer" << endl; - Bufferhead *bh; - if (curbuf == fc->buffer_map.end() && fc->buffer_map.count(offset)) { - dout(10) << "first buffer is currently read in" << endl; - bh = fc->buffer_map[offset]; - } else { - dout(10) << "first buffer is either hit or inflight" << endl; - bh = curbuf->second; - } - if (bh->state == BUFHD_STATE_RX || bh->state == BUFHD_STATE_TX) { - dout(10) << "waiting for first buffer" << endl; - bh->wait_for_read(client_lock); - } - - // buffer is filled -- see how much we can return - hits.clear(); rx.clear(); tx.clear(); holes.clear(); - fc->map_existing(size, offset, hits, rx, tx, holes); // FIXME: overkill - assert(hits.count(bh->offset)); - rvalue = bc.touch_continuous(hits, size, offset); - fc->copy_out(rvalue, offset, buf); - dout(7) << "read bc no hit: returned first " << rvalue << " bytes" << endl; - trim_bcache(); - } -#endif // done! client_lock.Unlock(); return rvalue; @@ -1449,70 +1453,69 @@ int Client::write(fileh_t fh, const char *buf, size_t size, off_t offset) } -#ifdef BUFFERCACHE - // buffered write? - if (f->caps & CFILE_CAP_WRBUFFER) { + if (g_conf.client_bcache && // buffer cache ON? + f->caps & CFILE_CAP_WRBUFFER) { // caps buffered write? // buffered write dout(7) << "buffered/async write" << endl; - - // map buffercache for writing - map buffers, rx, tx; - bc.map_or_alloc(in->inode.ino, size, offset, buffers, rx, tx); - - // wait for rx and tx buffers -- FIXME: don't need to wait for tx buffers - while (!rx.empty() || !tx.empty()) { - if (!rx.empty()) { - rx.begin()->second->wait_for_write(client_lock); - } else { - tx.begin()->second->wait_for_write(client_lock); - } - buffers.clear(); tx.clear(); rx.clear(); - bc.map_or_alloc(in->inode.ino, size, offset, buffers, rx, tx); // FIXME: overkill - } - bc.dirty(in->inode.ino, size, offset, buf); - - trim_bcache(); - - /* - hack for now.. replace this with a real buffer cache - - just copy the buffer, send the write off, and return immediately. - flush() will block until all outstanding writes complete. - */ + + // map buffercache for writing + map buffers, rx, tx; + bc.map_or_alloc(in->inode.ino, size, offset, buffers, rx, tx); + + // wait for rx and tx buffers -- FIXME: don't need to wait for tx buffers + while (!rx.empty() || !tx.empty()) { + if (!rx.empty()) { + rx.begin()->second->wait_for_write(client_lock); + } else { + tx.begin()->second->wait_for_write(client_lock); + } + buffers.clear(); tx.clear(); rx.clear(); + bc.map_or_alloc(in->inode.ino, size, offset, buffers, rx, tx); // FIXME: overkill + } + bc.dirty(in->inode.ino, size, offset, buf); + + trim_bcache(); + + /* + hack for now.. replace this with a real buffer cache + + just copy the buffer, send the write off, and return immediately. + flush() will block until all outstanding writes complete. + */ /* this totally sucks, just do synchronous writes! - bufferlist *blist = new bufferlist; - blist->push_back( new buffer(buf, size, BUFFER_MODE_COPY|BUFFER_MODE_FREE) ); - - in->inflight_buffers.insert(blist); - - Context *onfinish = new C_Client_WriteBuffer( in, blist ); - filer->write(in->inode.ino, g_OSD_FileLayout, size, offset, *blist, 0, onfinish); + bufferlist *blist = new bufferlist; + blist->push_back( new buffer(buf, size, BUFFER_MODE_COPY|BUFFER_MODE_FREE) ); + + in->inflight_buffers.insert(blist); + + Context *onfinish = new C_Client_WriteBuffer( in, blist ); + filer->write(in->inode.ino, g_OSD_FileLayout, size, offset, *blist, 0, onfinish); */ - + } else { -#else - { -#endif // synchronous write // FIXME: do not bypass buffercache - dout(7) << "synchronous write" << endl; - - // create a buffer that refers to *buf, but doesn't try to free it when it's done. - bufferlist blist; - blist.push_back( new buffer(buf, size, BUFFER_MODE_NOCOPY|BUFFER_MODE_NOFREE) ); - - // issue write - Cond cond; - int rvalue; - - C_Client_Cond *onfinish = new C_Client_Cond(&cond, &client_lock, &rvalue); - filer->write(in->inode.ino, g_OSD_FileLayout, size, offset, blist, 0, onfinish); - - cond.Wait(client_lock); - } -#if 0 + //if (g_conf.client_bcache) { + // write me + //} else + { + dout(7) << "synchronous write" << endl; + + // create a buffer that refers to *buf, but doesn't try to free it when it's done. + bufferlist blist; + blist.push_back( new buffer(buf, size, BUFFER_MODE_NOCOPY|BUFFER_MODE_NOFREE) ); + + // issue write + Cond cond; + int rvalue; + + C_Client_Cond *onfinish = new C_Client_Cond(&cond, &client_lock, &rvalue); + filer->write(in->inode.ino, g_OSD_FileLayout, size, offset, blist, 0, onfinish); + + cond.Wait(client_lock); + } } -#endif + // assume success for now. FIXME. diff --git a/ceph/config.cc b/ceph/config.cc index 0b430a47d07c..4ff816691dba 100644 --- a/ceph/config.cc +++ b/ceph/config.cc @@ -32,8 +32,8 @@ OSDFileLayout g_OSD_MDLogLayout( 1<<7, 32, 1<<20 ); // new (good?) way md_config_t g_conf = { - num_mds: 2, - num_osd: 5, + num_mds: 1, + num_osd: 4, num_client: 1, // profiling and debugging @@ -61,6 +61,9 @@ md_config_t g_conf = { client_cache_stat_ttl: 10, // seconds until cached stat results become invalid client_use_random_mds: false, + client_sync_writes: 0, + + client_bcache: 1, client_bcache_alloc_minsize: 1024, client_bcache_alloc_maxsize: 262144, client_bcache_ttl: 30, // seconds until dirty buffers are written to disk @@ -223,6 +226,12 @@ void parse_config_options(int argc, char **argv, else if (strcmp(argv[i], "--fuse_direct_io") == 0) g_conf.fuse_direct_io = atoi(argv[++i]); + else if (strcmp(argv[i], "--client_sync_writes") == 0) + g_conf.client_sync_writes = atoi(argv[++i]); + else if (strcmp(argv[i], "--client_bcache") == 0) + g_conf.client_bcache = atoi(argv[++i]); + + else if (strcmp(argv[i], "--osd_nrep") == 0) g_conf.osd_nrep = atoi(argv[++i]); else if (strcmp(argv[i], "--osd_fsync") == 0) diff --git a/ceph/config.h b/ceph/config.h index 02fe382b8d99..4563340fa322 100644 --- a/ceph/config.h +++ b/ceph/config.h @@ -36,6 +36,8 @@ struct md_config_t { int client_cache_stat_ttl; bool client_use_random_mds; // debug flag + bool client_sync_writes; + bool client_bcache; int client_bcache_alloc_minsize; int client_bcache_alloc_maxsize; int client_bcache_ttl; diff --git a/ceph/msg/FakeMessenger.cc b/ceph/msg/FakeMessenger.cc index 787e177e85e3..921636a95879 100644 --- a/ceph/msg/FakeMessenger.cc +++ b/ceph/msg/FakeMessenger.cc @@ -228,6 +228,7 @@ int FakeMessenger::shutdown() lock.Lock(); directory.erase(whoami); if (directory.empty()) { + dout(1) << "fakemessenger: last shutdown" << endl; ::shutdown = true; cond.Signal(); // why not } diff --git a/ceph/osd/OSD.cc b/ceph/osd/OSD.cc index a6a152659a9d..d09ddacf0db9 100644 --- a/ceph/osd/OSD.cc +++ b/ceph/osd/OSD.cc @@ -100,12 +100,12 @@ OSD::OSD(int id, Messenger *m) OSD::~OSD() { + if (threadpool) { delete threadpool; threadpool = 0; } if (osdcluster) { delete osdcluster; osdcluster = 0; } if (monitor) { delete monitor; monitor = 0; } if (messenger) { delete messenger; messenger = 0; } if (logger) { delete logger; logger = 0; } if (store) { delete store; store = 0; } - if (threadpool) { delete threadpool; threadpool = 0; } } int OSD::init()