From 2fa46c1f903734a9097e8ed183cd6604fa6ed60f Mon Sep 17 00:00:00 2001 From: sageweil Date: Fri, 13 Jul 2007 17:29:26 +0000 Subject: [PATCH] * fixed a bug in buffer.h! yay! should be much more memory efficient now, too. git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1496 29311d96-e01e-0410-9327-a35deaab8ce9 --- trunk/ceph/TODO | 9 +++++ trunk/ceph/include/buffer.h | 66 +++++++++++++++++---------------- trunk/ceph/include/types.h | 2 +- trunk/ceph/messages/MOSDMap.h | 6 +-- trunk/ceph/mon/ClientMonitor.cc | 2 +- trunk/ceph/mon/MDSMonitor.cc | 2 +- trunk/ceph/mon/OSDMonitor.cc | 18 ++++----- trunk/ceph/mon/OSDMonitor.h | 2 +- trunk/ceph/osd/FakeStore.cc | 2 +- trunk/ceph/osd/FakeStore.h | 2 +- trunk/ceph/osd/OSD.cc | 7 +++- trunk/ceph/osd/OSDMap.h | 15 +++----- trunk/ceph/osd/ObjectStore.h | 10 +---- 13 files changed, 73 insertions(+), 70 deletions(-) diff --git a/trunk/ceph/TODO b/trunk/ceph/TODO index c14d187625da8..d195ef9fd00d9 100644 --- a/trunk/ceph/TODO +++ b/trunk/ceph/TODO @@ -1,5 +1,11 @@ +- change same_inst_since to align with "in" set +- tag MClientRequest with mdsmap v +- push new mdsmap to clients on send_message_client, based on the tag? + - hrm, what about exports and stale caps wonkiness... there's a race with the REAP. hmm. + + some smallish projects: - crush rewrite in C @@ -232,6 +238,9 @@ osd/rados - pg_bit/pg_num changes - report crashed pgs? +messenger +- fix messenger shutdown.. we shouldn't delete messenger, since the caller may be referencing it, etc. + simplemessenger - close idle connections - buffer sent messages until a receive is acknowledged (handshake!) diff --git a/trunk/ceph/include/buffer.h b/trunk/ceph/include/buffer.h index 1f61d3b892ba5..cafbbc62f90b6 100644 --- a/trunk/ceph/include/buffer.h +++ b/trunk/ceph/include/buffer.h @@ -267,7 +267,14 @@ public: char *c_str() { assert(_raw); return _raw->data + _off; } unsigned length() const { return _len; } unsigned offset() const { return _off; } - unsigned unused_tail_length() const { return _raw->len - (_off+_len); } + unsigned start() const { return _off; } + unsigned end() const { return _off + _len; } + unsigned unused_tail_length() const { + if (_raw) + return _raw->len - (_off+_len); + else + return 0; + } const char& operator[](unsigned n) const { assert(_raw); assert(n < _len); @@ -332,6 +339,7 @@ public: // my private bits std::list _buffers; unsigned _len; + ptr append_buffer; // where i put small appends. public: // cons/des @@ -352,10 +360,10 @@ public: const std::list& buffers() const { return _buffers; } unsigned length() const { -#if 0 +#if 1 // DEBUG: verify _len unsigned len = 0; - for (std::list::iterator it = _buffers.begin(); + for (std::list::const_iterator it = _buffers.begin(); it != _buffers.end(); it++) { len += (*it).length(); @@ -508,33 +516,23 @@ public: void append(const char *data, unsigned len) { - if (len == 0) return; - - unsigned alen = 0; - - // copy into the tail buffer? - if (!_buffers.empty()) { - unsigned avail = _buffers.back().unused_tail_length(); - if (avail > 0) { - //std::cout << "copying up to " << len << " into tail " << avail << " bytes of tail buf " << _buffers.back() << std::endl; - if (avail > len) - avail = len; - _buffers.back().append(data, avail); - _len += avail; - data += avail; - len -= avail; + while (len > 0) { + // put what we can into the existing append_buffer. + if (append_buffer.unused_tail_length() > 0) { + unsigned gap = append_buffer.unused_tail_length(); + if (gap > len) gap = len; + append_buffer.append(data, gap); + append(append_buffer, append_buffer.end() - gap, gap); // add segment to the list + len -= gap; + data += gap; } - alen = _buffers.back().length(); + if (len == 0) break; // done! + + // make a new append_buffer! + unsigned alen = BUFFER_PAGE_SIZE * (((len-1) / BUFFER_PAGE_SIZE) + 1); + append_buffer = create_page_aligned(alen); + append_buffer.set_length(0); // unused, so far. } - if (len == 0) return; - - // just add another buffer. - // alloc a bit extra, in case we do a bunch of appends. FIXME be smarter! - if (alen < 4096) alen = 4096; - ptr bp = create(alen); - bp.set_length(len); - bp.copy_in(0, len, data); - push_back(bp); } void append(ptr& bp) { push_back(bp); @@ -545,8 +543,11 @@ public: push_back(tempbp); } void append(const list& bl) { - list temp(bl); // copy list - claim_append(temp); // and append + _len += bl._len; + for (std::list::const_iterator p = bl._buffers.begin(); + p != bl._buffers.end(); + ++p) + _buffers.push_back(*p); } @@ -586,12 +587,12 @@ public: } } - void substr_of(list& other, unsigned off, unsigned len) { + void substr_of(const list& other, unsigned off, unsigned len) { assert(off + len <= other.length()); clear(); // skip off - std::list::iterator curbuf = other._buffers.begin(); + std::list::const_iterator curbuf = other._buffers.begin(); while (off > 0) { assert(curbuf != _buffers.end()); if (off >= (*curbuf).length()) { @@ -953,6 +954,7 @@ inline void _decode(bufferptr& bp, bufferlist& bl, int& off) inline void _encode(const bufferlist& s, bufferlist& bl) { uint32_t len = s.length(); + cout << "_encode bufferlist len " << len << endl; _encoderaw(len, bl); bl.append(s); } diff --git a/trunk/ceph/include/types.h b/trunk/ceph/include/types.h index 8b778c90aa7a8..d6bdbff2983db 100644 --- a/trunk/ceph/include/types.h +++ b/trunk/ceph/include/types.h @@ -258,7 +258,7 @@ struct inode_t { bool anchored; // auth only? // file (data access) - off_t size, max_size; + off_t size, max_size, allocated_size; utime_t mtime; // file data modify time. utime_t atime; // file data access time. diff --git a/trunk/ceph/messages/MOSDMap.h b/trunk/ceph/messages/MOSDMap.h index 83929ddd23c28..b6de1b027557c 100644 --- a/trunk/ceph/messages/MOSDMap.h +++ b/trunk/ceph/messages/MOSDMap.h @@ -45,10 +45,8 @@ class MOSDMap : public Message { } - MOSDMap() : - Message(MSG_OSD_MAP) {} - MOSDMap(OSDMap *oc) : - Message(MSG_OSD_MAP) { + MOSDMap() : Message(MSG_OSD_MAP) { } + MOSDMap(OSDMap *oc) : Message(MSG_OSD_MAP) { oc->encode(maps[oc->get_epoch()]); } diff --git a/trunk/ceph/mon/ClientMonitor.cc b/trunk/ceph/mon/ClientMonitor.cc index bfdede66b8ca7..175f70477f7a5 100644 --- a/trunk/ceph/mon/ClientMonitor.cc +++ b/trunk/ceph/mon/ClientMonitor.cc @@ -211,7 +211,7 @@ void ClientMonitor::_mounted(int client, MClientMount *m) // reply with latest mds, osd maps mon->mdsmon->send_latest(to); - mon->osdmon->send_latest(0, to); + mon->osdmon->send_latest(to); delete m; } diff --git a/trunk/ceph/mon/MDSMonitor.cc b/trunk/ceph/mon/MDSMonitor.cc index 2e5ff250c1ac6..7578ddc99c356 100644 --- a/trunk/ceph/mon/MDSMonitor.cc +++ b/trunk/ceph/mon/MDSMonitor.cc @@ -355,7 +355,7 @@ void MDSMonitor::_updated(int from, MMDSBeacon *m) { if (m->get_state() == MDSMap::STATE_BOOT) { dout(10) << "_updated (booted) mds" << from << " " << *m << endl; - mon->osdmon->send_latest(0, mdsmap.get_inst(from)); + mon->osdmon->send_latest(mdsmap.get_inst(from)); } else { dout(10) << "_updated mds" << from << " " << *m << endl; } diff --git a/trunk/ceph/mon/OSDMonitor.cc b/trunk/ceph/mon/OSDMonitor.cc index 0dc188932dbab..11b2f59d1771d 100644 --- a/trunk/ceph/mon/OSDMonitor.cc +++ b/trunk/ceph/mon/OSDMonitor.cc @@ -76,7 +76,7 @@ void OSDMonitor::fake_osdmap_update() // tell a random osd int osd = rand() % g_conf.num_osd; - send_latest(0, osdmap.get_inst(osd)); + send_latest(osdmap.get_inst(osd)); } @@ -93,7 +93,7 @@ void OSDMonitor::fake_reorg() } propose_pending(); - send_latest(0, osdmap.get_inst(r)); // after + send_latest(osdmap.get_inst(r)); // after } @@ -464,7 +464,7 @@ bool OSDMonitor::prepare_failure(MOSDFailure *m) void OSDMonitor::_reported_failure(MOSDFailure *m) { dout(7) << "_reported_failure on " << m->get_failed() << ", telling " << m->get_from() << endl; - send_latest(m->get_epoch(), m->get_from()); + send_latest(m->get_from(), m->get_epoch()); } @@ -526,7 +526,7 @@ bool OSDMonitor::prepare_boot(MOSDBoot *m) void OSDMonitor::_booted(MOSDBoot *m) { dout(7) << "_booted " << m->inst << endl; - send_latest(m->sb.current_epoch, m->inst); + send_latest(m->inst, m->sb.current_epoch); delete m; } @@ -576,14 +576,14 @@ void OSDMonitor::send_to_waiting() } } -void OSDMonitor::send_latest(epoch_t since, entity_inst_t who) +void OSDMonitor::send_latest(entity_inst_t who, epoch_t since) { if (paxos->is_readable()) { dout(5) << "send_latest to " << who << " now" << endl; - if (since) - send_incremental(since, who); - else + if (since == (epoch_t)(-1)) send_full(who); + else + send_incremental(since, who); } else { dout(5) << "send_latest to " << who << " later" << endl; awaiting_map[who.name].first = who; @@ -610,7 +610,7 @@ void OSDMonitor::send_incremental(epoch_t since, entity_inst_t dest) e--) { bufferlist bl; if (mon->store->get_bl_sn(bl, "osdmap", e) > 0) { - dout(20) << "send_incremental inc " << e << endl; + dout(20) << "send_incremental inc " << e << " " << bl.length() << " bytes" << endl; m->incremental_maps[e] = bl; } else if (mon->store->get_bl_sn(bl, "osdmap_full", e) > 0) { diff --git a/trunk/ceph/mon/OSDMonitor.h b/trunk/ceph/mon/OSDMonitor.h index 9927cf805f2bc..59424a6fbe9e8 100644 --- a/trunk/ceph/mon/OSDMonitor.h +++ b/trunk/ceph/mon/OSDMonitor.h @@ -114,7 +114,7 @@ private: void mark_all_down(); - void send_latest(epoch_t since, entity_inst_t i); + void send_latest(entity_inst_t i, epoch_t since=(epoch_t)(-1)); void fake_osd_failure(int osd, bool down); void fake_osdmap_update(); diff --git a/trunk/ceph/osd/FakeStore.cc b/trunk/ceph/osd/FakeStore.cc index 9e56e481dbdfd..2ee0201bc9d7e 100644 --- a/trunk/ceph/osd/FakeStore.cc +++ b/trunk/ceph/osd/FakeStore.cc @@ -298,7 +298,7 @@ int FakeStore::read(object_t oid, int FakeStore::write(object_t oid, off_t offset, size_t len, - bufferlist& bl, + const bufferlist& bl, Context *onsafe) { dout(20) << "write " << oid << " len " << len << " off " << offset << endl; diff --git a/trunk/ceph/osd/FakeStore.h b/trunk/ceph/osd/FakeStore.h index 95ecae3f68af4..e88c205315bc0 100644 --- a/trunk/ceph/osd/FakeStore.h +++ b/trunk/ceph/osd/FakeStore.h @@ -78,7 +78,7 @@ class FakeStore : public ObjectStore { int remove(object_t oid, Context *onsafe); int truncate(object_t oid, off_t size, Context *onsafe); int read(object_t oid, off_t offset, size_t len, bufferlist& bl); - int write(object_t oid, off_t offset, size_t len, bufferlist& bl, Context *onsafe); + int write(object_t oid, off_t offset, size_t len, const bufferlist& bl, Context *onsafe); void sync(); void sync(Context *onsafe); diff --git a/trunk/ceph/osd/OSD.cc b/trunk/ceph/osd/OSD.cc index a857a068d76a3..0db85839b1f9f 100644 --- a/trunk/ceph/osd/OSD.cc +++ b/trunk/ceph/osd/OSD.cc @@ -894,10 +894,13 @@ void OSD::handle_osd_map(MOSDMap *m) dout(10) << "handle_osd_map decoding inc map epoch " << cur+1 << dendl; bufferlist bl; - if (m->incremental_maps.count(cur+1)) + if (m->incremental_maps.count(cur+1)) { + dout(10) << " using provided inc map" << endl; bl = m->incremental_maps[cur+1]; - else + } else { + dout(10) << " using my locally stored inc map" << endl; get_inc_map_bl(cur+1, bl); + } OSDMap::Incremental inc; int off = 0; diff --git a/trunk/ceph/osd/OSDMap.h b/trunk/ceph/osd/OSDMap.h index f27146d559a82..5803d55da2687 100644 --- a/trunk/ceph/osd/OSDMap.h +++ b/trunk/ceph/osd/OSDMap.h @@ -83,9 +83,9 @@ public: list old_overload; // no longer overload void encode(bufferlist& bl) { - bl.append((char*)&epoch, sizeof(epoch)); - bl.append((char*)&mon_epoch, sizeof(mon_epoch)); - bl.append((char*)&ctime, sizeof(ctime)); + ::_encode(epoch, bl); + ::_encode(mon_epoch, bl); + ::_encode(ctime, bl); ::_encode(new_up, bl); ::_encode(new_down, bl); ::_encode(new_in, bl); @@ -94,12 +94,9 @@ public: ::_encode(fullmap, bl); } void decode(bufferlist& bl, int& off) { - bl.copy(off, sizeof(epoch), (char*)&epoch); - off += sizeof(epoch); - bl.copy(off, sizeof(mon_epoch), (char*)&mon_epoch); - off += sizeof(mon_epoch); - bl.copy(off, sizeof(ctime), (char*)&ctime); - off += sizeof(ctime); + ::_decode(epoch, bl, off); + ::_decode(mon_epoch, bl, off); + ::_decode(ctime, bl, off); ::_decode(new_up, bl, off); ::_decode(new_down, bl, off); ::_decode(new_in, bl, off); diff --git a/trunk/ceph/osd/ObjectStore.h b/trunk/ceph/osd/ObjectStore.h index 74818e0470526..7f6dc7f0afae7 100644 --- a/trunk/ceph/osd/ObjectStore.h +++ b/trunk/ceph/osd/ObjectStore.h @@ -153,7 +153,7 @@ public: pattrsets.push_back(&aset); } - void write(object_t oid, off_t off, size_t len, bufferlist& bl) { + void write(object_t oid, off_t off, size_t len, const bufferlist& bl) { int op = OP_WRITE; ops.push_back(op); oids.push_back(oid); @@ -474,15 +474,9 @@ public: virtual int read(object_t oid, off_t offset, size_t len, bufferlist& bl) = 0; - - /*virtual int write(object_t oid, - off_t offset, size_t len, - bufferlist& bl, - bool fsync=true) = 0; - */ virtual int write(object_t oid, off_t offset, size_t len, - bufferlist& bl, + const bufferlist& bl, Context *onsafe) = 0;//{ return -1; } virtual void trim_from_cache(object_t oid, off_t offset, size_t len) { } -- 2.39.5