From 0b90cc57faf5536cc6d9c67f02f87a467cd7c97e Mon Sep 17 00:00:00 2001 From: sageweil Date: Mon, 20 Nov 2006 20:51:49 +0000 Subject: [PATCH] git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@960 29311d96-e01e-0410-9327-a35deaab8ce9 --- ceph/TODO | 33 +++++++++++++++- ceph/osdc/Blinker.h | 91 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 122 insertions(+), 2 deletions(-) create mode 100644 ceph/osdc/Blinker.h diff --git a/ceph/TODO b/ceph/TODO index 755c36fd40237..c3391f691330b 100644 --- a/ceph/TODO +++ b/ceph/TODO @@ -1,6 +1,13 @@ == todo +papers to read +- gribble et al 2000, scalable distributed hash table +- sagiv blink trees +- johnson and colbrook's DE and DB-trees (maybe fewer locks?) + + + - messenger lookup() and failure() upcalls - how to get usage feedback to monitor? @@ -9,6 +16,26 @@ - no more rank! make it a uniquish nonce? +osd +- pull out "object" service +- btree service +- interject snapshot abstraction. inbetween? + - how to generalize pg log entries? + - or put snapshotting in apply_operation? + +osdc +- distributed btree thing: Blinker! + +mds +- rewrite mdstore to use osd btree service + +journaler +- should we pad with zeros to avoid splitting individual entries? + - make it a g_conf flag? + - have to fix reader to skip over zeros (either <4 bytes for size, or zeroed sizes) +- need to truncate at detected (valid) write_pos to clear out any other partial trailing writes + + monitor ?- monitor user lib that handles resending, redirection of mon requests. @@ -25,13 +52,14 @@ osdmon mdsmon osd/rados +- pg_num instead of pg_bits - flag missing log entries on crash recovery --> WRNOOP? or WRLOST? - consider implications of nvram writeahead logs - fix heartbeat wrt new replication - mark residual pgs obsolete ??? - rdlocks - optimize remove wrt recovery pushes -- pg_bit changes +- pg_bit/pg_num changes - report crashed pgs? messenger @@ -74,7 +102,8 @@ bugs/stability general - timer needs cancel sets, schedulers need to cancel outstanding events on shutdown - - well, just figure out general timer cancellation strategy that avoids races +- well, just figure out general timer cancellation strategy that avoids races + - use updated Timer as a model? remaining hard problems diff --git a/ceph/osdc/Blinker.h b/ceph/osdc/Blinker.h new file mode 100644 index 0000000000000..231fe47fb1e31 --- /dev/null +++ b/ceph/osdc/Blinker.h @@ -0,0 +1,91 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2006 Sage Weil + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef __BLINKER_H +#define __BLINKER_H + +class Blinker { + + public: + + class Op { + int op; + static const int LOOKUP = 1; + static const int INSERT = 2; + static const int REMOVE = 3; + static const int CLEAR = 4; + Op(int o) : op(o) {} + }; + + class OpLookup : public Op { + public: + bufferptr key; + OpLookup(bufferptr& k) : Op(Op::LOOKUP), key(k) {} + }; + + class OpInsert : public Op { + bufferptr key; + bufferlist val; + OpInsert(bufferptr& k, bufferlist& v) : Op(Op::INSERT), key(k), val(v) {} + }; + + class OpRemove : public Op { + public: + bufferptr key; + OpRemove(bufferptr& k) : Op(Op::REMOVE), key(k) {} + }; + + class OpClear : public Op { + public: + OpClear() : Op(Op::CLEAR) {} + }; + + + +private: + Objecter *objecter; + + // in-flight operations. + + + // cache information about tree structure. + + + +public: + // public interface + + // simple accessors + void lookup(inode_t& inode, bufferptr& key, bufferlist *pval, Context *onfinish); + + // simple modifiers + void insert(inode_t& inode, bufferptr& key, bufferlist& val, Context *onack, Context *onsafe); + void remove(inode_t& inode, bufferptr& key, Context *onack, Context *onsafe); + void clear(inode_t& inode, Context *onack, Context *onsafe); + + // these are dangerous: the table may be large. + void listkeys(inode_t& inode, list* pkeys, Context *onfinish); + void listvals(inode_t& inode, list* pkeys, list* pvals, Context *onfinish); + + // fetch *at least* key, but also anything else that is convenient. + // include lexical bounds for which this is a complete result. + // (if *start and *end are empty, it's the entire table) + void prefetch(inode_t& inode, bufferptr& key, + list* pkeys, list* pvals, + bufferptr *start, bufferptr *end, + Context *onfinish); + + +}; + +#endif -- 2.39.5