md_config_t g_conf = {
num_mds: 33,
num_osd: 10,
- num_client: 1000,
+ num_client: 10,
osd_cow: false, // crashy? true,
fake_clock: true,
fakemessenger_serialize: true,
- debug: 13,
+ debug: 10,
mdcache_size: MDS_CACHE_SIZE,
mdcache_mid: .8,
class CInode;
class CDir;
+#define CDENTRY_STATE_FROZEN 1
+
// dentry
class CDentry {
protected:
string name;
CInode *inode;
CDir *dir;
+ int state;
friend class MDCache;
friend class MDS;
CDentry() {
inode = NULL;
dir = NULL;
+ state = 0;
}
CDentry(string& n, CInode *in) {
name = n;
inode = in;
+ state = 0;
}
CInode *get_inode() {
bool operator>= (const CDentry& right) const;
bool operator<= (const CDentry& right) const;
+ // -- locking
+ //bool is_frozen() { return is_frozen_dentry() || dir->is_frozen_dir(); }
+ //bool is_frozen_dentry() { return state & CDENTRY_STATE_FROZENDENTRY; }
+
// -- hierarchy
void remove();
if (dir.get_dir_auth() != CDIR_AUTH_PARENT)
out << " dir_auth=" << dir.get_dir_auth();
- return out << " state=" << dir.get_state();
+ out << " state=" << dir.get_state();
return out << "]";
}
// state bits
-#define CDIR_STATE_COMPLETE (1<<0) // the complete contents are in cache
-#define CDIR_STATE_DIRTY (1<<1) // has been modified since last commit
+#define CDIR_STATE_AUTH (1<<0) // auth for this dir (hashing doesn't count)
+#define CDIR_STATE_PROXY (1<<1) // proxy auth
-#define CDIR_STATE_FROZENTREE (1<<2) // root of tree (bounded by exports)
-#define CDIR_STATE_FREEZINGTREE (1<<3) // in process of freezing
-#define CDIR_STATE_FROZENDIR (1<<4)
-#define CDIR_STATE_FREEZINGDIR (1<<5)
+#define CDIR_STATE_COMPLETE (1<<2) // the complete contents are in cache
+#define CDIR_STATE_DIRTY (1<<3) // has been modified since last commit
-#define CDIR_STATE_COMMITTING (1<<6) // mid-commit
-#define CDIR_STATE_FETCHING (1<<7) // currenting fetching
+#define CDIR_STATE_FROZENTREE (1<<4) // root of tree (bounded by exports)
+#define CDIR_STATE_FREEZINGTREE (1<<5) // in process of freezing
+#define CDIR_STATE_FROZENDIR (1<<6)
+#define CDIR_STATE_FREEZINGDIR (1<<7)
-#define CDIR_STATE_IMPORT (1<<8) // flag set if this is an import.
-#define CDIR_STATE_EXPORT (1<<9)
-#define CDIR_STATE_AUTH (1<<10) // auth for this dir (hashing doesn't count)
-#define CDIR_STATE_PROXY (1<<11)
+#define CDIR_STATE_COMMITTING (1<<8) // mid-commit
+#define CDIR_STATE_FETCHING (1<<9) // currenting fetching
+
+#define CDIR_STATE_IMPORT (1<<10) // flag set if this is an import.
+#define CDIR_STATE_EXPORT (1<<11)
#define CDIR_STATE_HASHED (1<<12) // if hashed. only hashed+auth on auth node.
#define CDIR_STATE_HASHING (1<<13)
version = 0;
- this->auth = auth; // by default.
+ //this->auth = auth; // by default.
+ state_set(CINODE_STATE_AUTH);
}
CInode::~CInode() {
void CInode::set_auth(bool a)
{
if (!is_dangling() && !is_root() &&
- auth != a) {
+ is_auth() != a) {
CDir *dir = get_parent_dir();
- if (auth && !a)
+ if (is_auth() && !a)
dir->nauthitems--;
else
dir->nauthitems++;
}
- auth = a;
+
+ if (a) state_set(CINODE_STATE_AUTH);
+ else state_clear(CINODE_STATE_AUTH);
}
#include <sys/stat.h>
#include "CDentry.h"
+#include "Lock.h"
#include <cassert>
#include <list>
// state
-#define CINODE_STATE_ROOT 1
-#define CINODE_STATE_DIRTY 2
-#define CINODE_STATE_UNSAFE 4 // not logged yet
-#define CINODE_STATE_DANGLING 8 // delete me when i expire; i have no dentry
-#define CINODE_STATE_UNLINKING 16
-#define CINODE_STATE_PROXY 32 // can't expire yet
-#define CINODE_STATE_EXPORTING 64 // on nonauth bystander.
+#define CINODE_STATE_AUTH (1<<0)
+#define CINODE_STATE_ROOT (1<<1)
-#define CINODE_STATE_RENAMING 128 // moving me
-#define CINODE_STATE_RENAMINGTO 256 // rename target (will be unlinked)
+#define CINODE_STATE_DIRTY (1<<2)
+#define CINODE_STATE_UNSAFE (1<<3) // not logged yet
+#define CINODE_STATE_DANGLING (1<<4) // delete me when i expire; i have no dentry
+#define CINODE_STATE_UNLINKING (1<<5)
+#define CINODE_STATE_PROXY (1<<6) // can't expire yet
+#define CINODE_STATE_EXPORTING (1<<7) // on nonauth bystander.
+
+#define CINODE_STATE_RENAMING (1<<8) // moving me
+#define CINODE_STATE_RENAMINGTO (1<<9) // rename target (will be unlinked)
// misc
CInode *lru_next, *lru_prev;
// -- distributed caching
- bool auth; // safety check; true if this is authoritative.
+ //bool auth; // safety check; true if this is authoritative.
set<int> cached_by; // mds's that cache me.
/* NOTE: on replicas, this doubles as replicated_by, but the
cached_by_* access methods below should NOT be used in those
bool is_root() { return state & CINODE_STATE_ROOT; }
bool is_proxy() { return state & CINODE_STATE_PROXY; }
- bool is_auth() { return auth; }
+ bool is_auth() { return state & CINODE_STATE_AUTH; }
void set_auth(bool auth);
- bool is_replica() { return !auth; }
+ bool is_replica() { return !is_auth(); }
int get_replica_nonce() { assert(!is_auth()); return replica_nonce; }
inodeno_t ino() { return inode.ino; }
--- /dev/null
+#ifndef __LOCK_H
+#define __LOCK_H
+
+#include <assert.h>
+#include <set>
+using namespace std;
+
+// STATES
+// basic lock
+#define LOCK_SYNC 0
+#define LOCK_PRELOCK 1
+#define LOCK_LOCK 2
+#define LOCK_DELETING 3 // auth only
+#define LOCK_DELETED 4
+
+// async lock
+#define LOCK_ASYNC 5
+#define LOCK_RESYNC 6 // to sync
+#define LOCK_RESYNC2 7 // to lock
+
+
+// -- basic lock
+
+class BasicLock {
+ protected:
+ // lock state
+ char state;
+ set<int> gather_set; // auth
+
+ public:
+ BasicLock() : state(0) {
+ }
+
+ char get_state() { return state; }
+ char set_state(char s) { state = s; };
+ set<int>& get_gather_set() { return gather_set; }
+
+ void init_gather(set<int>& i) {
+ gather_set = i;
+ }
+
+ bool can_read(bool auth) {
+ if (auth)
+ return (state == LOCK_SYNC) || (state == LOCK_PRELOCK) || (state == LOCK_LOCK);
+ if (!auth)
+ return (state == LOCK_SYNC);
+ }
+
+ bool can_write(bool auth) {
+ return auth && state == LOCK_LOCK;
+ }
+};
+
+ostream& operator<<(ostream& out, BasicLock& l) {
+ static char* __lock_states[] = {
+ "sync",
+ "prelock",
+ "lock",
+ "deleting",
+ "deleted",
+ "async",
+ "resync",
+ "resync2"
+ };
+
+ out << "Lock(" << __lock_states[l.get_state()];
+
+ if (!l.get_gather_set().empty()) out << " g=" << l.get_gather_set();
+
+ // rw?
+ out << " ";
+ if (l.can_read(true)) out << "r";
+ if (l.can_write(true)) out << "w";
+ out << "/";
+ if (l.can_read(false)) out << "r";
+ if (l.can_write(false)) out << "w";
+
+ out << ")";
+ return out;
+}
+
+
+// -- async lock
+
+class AsyncLock : public BasicLock {
+ public:
+ AsyncLock() : BasicLock() {
+ assert(state == 0);
+ }
+ bool can_write(bool auth) {
+ if (auth)
+ return (state == LOCK_LOCK)
+ || (state == LOCK_ASYNC) || (state == LOCK_RESYNC) || (state == LOCK_RESYNC2);
+ if (!auth)
+ return (state == LOCK_ASYNC);
+ }
+};
+
+
+#endif
}
show_imports();
-
}
/*
+LOCKS:
+
+ three states:
+
+ Auth Replica State
+ R R normal/sync fw writes to auth
+ RW - lock ping auth for R/W?
+ W W async (*) fw reads to auth
+
+ * only defined for soft inode metadata, right?
+
+ we also remember:
+ auth:
+ set<int> replicas
+ bool req_r, req_w
+
+ replica:
+ last_sync - stamp of last time we were sync
+
+
+
+
+
+
INODES:
- two types of inode metadata:
- hard - uid/gid, mode
- soft - m/c/atime, size
+= two types of inode metadata:
+ hard - uid/gid, mode
+ soft - m/ctime, size
+ ? atime - atime (*)
+
+ * if we want _strict_ atime behavior, atime can be folded into soft.
+ for lazy atime, should we just leave the atime lock in async state? XXX
+
+= correspondingly, two types of inode locks:
+ hardlock - hard metadata
+ softlock - soft metadata
+
+ -> These locks are completely orthogonal!
+
+= metadata ops and how they affect inode metadata:
+ scma=size ctime mtime atime
+ HARD SOFT OP
+ files:
+ R RRRR stat
+ RW chmod/chown
+ R wW touch ?ctime
+ R openr
+ W read atime
+ R openw
+ R w openwc ?ctime
+ W W write size mtime
+ close
+ dirs:
+ R W readdir atime
+ RRRR ( + implied stats on files)
+ R W W link/unlink/rename/rmdir
+ R WwW mkdir (ctime on new dir, size+mtime on parent dir)
- correspondingly, two types of locks:
+
+
+= relationship to client (writers):
+
+ - ops in question are
+ - stat ... need reasonable value for mtime (+ atime?)
+ - maybe we want a "quicksync" type operation instead of full lock
+ - truncate ... need to stop writers for the atomic truncate operation
+ - need a full lock
+
+
+
+
+
+
+ALSO:
+
+ dirlock - no dir changes (prior to unhashing)
+ denlock - dentry lock (prior to unlink, rename)
+
+
+
+
+
+OLD CRAP:
+ (old):
sync - soft metadata.. no reads/writes can proceed. (eg no stat)
lock - hard(+soft) metadata.. path traversals stop etc. (??)
MExportDirNotify *notify = new MExportDirNotify(dir->ino(), m->get_source(), mds->get_nodeid());
notify->copy_exports(m->get_exports());
+
if (g_conf.mds_verify_export_dirauth)
- notify->copy_subdirs(imported_subdirs); // copy subdir list (debug)
+ notify->copy_subdirs(imported_subdirs); // copy subdir list (DEBUG)
mds->messenger->send_message(notify,
MSG_ADDR_MDS( *it ), MDS_PORT_CACHE,
assert(dir->authority() != mds->get_nodeid());
assert(!dir->is_auth());
- // debug: verify subdirs
+ // DEBUG: verify subdirs
if (g_conf.mds_verify_export_dirauth) {
dout(7) << "handle_export_dir_notify on " << *dir << " checking " << m->num_subdirs() << " subdirs" << endl;