pair<int,int> CInode::authority()
{
- if (is_root())
- return CDIR_AUTH_ROOTINODE; // root _inode_ is locked to mds0.
+ //if (is_root())
+ //return CDIR_AUTH_ROOTINODE; // root _inode_ is locked to mds0.
+ if (force_auth >= 0)
+ return pair<int,int>(force_auth, -2);
if (parent)
return parent->dir->authority();
#include "SimpleLock.h"
#include "FileLock.h"
#include "ScatterLock.h"
+#include "LocalLock.h"
#include "Capability.h"
static const int WAIT_DIRFRAGTREELOCK_OFFSET = 5 + 2*SimpleLock::WAIT_BITS;
static const int WAIT_FILELOCK_OFFSET = 5 + 3*SimpleLock::WAIT_BITS;
static const int WAIT_DIRLOCK_OFFSET = 5 + 4*SimpleLock::WAIT_BITS;
+ static const int WAIT_VERSIONLOCK_OFFSET = 5 + 5*SimpleLock::WAIT_BITS;
static const int WAIT_ANY = 0xffffffff;
CDentry *parent; // primary link
set<CDentry*> remote_parents; // if hard linked
+ int force_auth;
// -- distributed state --
protected:
CInode(MDCache *c, bool auth=true) :
mdcache(c),
last_open_journaled(0),
- parent(0),
+ parent(0), force_auth(-1),
replica_caps_wanted(0),
auth_pins(0), nested_auth_pins(0),
+ versionlock(this, LOCK_OTYPE_IVERSION, WAIT_VERSIONLOCK_OFFSET),
authlock(this, LOCK_OTYPE_IAUTH, WAIT_AUTHLOCK_OFFSET),
linklock(this, LOCK_OTYPE_ILINK, WAIT_LINKLOCK_OFFSET),
dirfragtreelock(this, LOCK_OTYPE_IDIRFRAGTREE, WAIT_DIRFRAGTREELOCK_OFFSET),
// -- locks --
public:
+ LocalLock versionlock;
SimpleLock authlock;
SimpleLock linklock;
SimpleLock dirfragtreelock;
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#ifndef __LOCALLOCK_H
+#define __LOCALLOCK_H
+
+#include "SimpleLock.h"
+
+class LocalLock : public SimpleLock {
+protected:
+ int num_wrlock;
+
+public:
+ LocalLock(MDSCacheObject *o, int t, int wo) :
+ SimpleLock(o, t, wo),
+ num_wrlock(0) {
+ set_state(LOCK_LOCK); // always.
+ }
+
+ bool can_wrlock() {
+ return !is_xlocked();
+ }
+ void get_wrlock() {
+ assert(can_wrlock());
+ ++num_wrlock;
+ }
+ void put_wrlock() {
+ --num_wrlock;
+ }
+ bool is_wrlocked() { return num_wrlock > 0; }
+ int get_num_wrlocks() { return num_wrlock; }
+
+
+ void print(ostream& out) {
+ out << "(";
+ out << get_lock_type_name(get_type()) << " ";
+ if (is_xlocked())
+ out << " x=" << get_xlocked_by();
+ if (is_wrlocked())
+ out << " wr=" << get_num_wrlocks();
+ out << ")";
+ }
+
+};
+
+
+#endif
}
dout(10) << "acquire_locks " << *mdr << endl;
- // sort everything we will lock
- set<SimpleLock*, SimpleLock::ptr_lt> sorted;
+ set<SimpleLock*, SimpleLock::ptr_lt> sorted; // sort everything we will lock
+ set<SimpleLock*> mustpin = xlocks; // items to authpin
- // (local) AUTH PINS
+ // xlocks
+ for (set<SimpleLock*>::iterator p = xlocks.begin(); p != xlocks.end(); ++p) {
+ dout(20) << " must xlock " << **p << " " << *(*p)->get_parent() << endl;
+ sorted.insert(*p);
- // make list of items to authpin
- set<SimpleLock*> mustpin = xlocks;
+ // augment xlock with a versionlock?
+ if ((*p)->get_type() > LOCK_OTYPE_IVERSION) {
+ // inode version lock?
+ CInode *in = (CInode*)(*p)->get_parent();
+ if (mdr->is_master()) {
+ // master. wrlock versionlock so we can pipeline inode updates to journal.
+ wrlocks.insert(&in->versionlock);
+ } else {
+ // slave. exclusively lock the inode version (i.e. block other journal updates)
+ xlocks.insert(&in->versionlock);
+ sorted.insert(&in->versionlock);
+ }
+ }
+ }
+ // wrlocks
for (set<SimpleLock*>::iterator p = wrlocks.begin(); p != wrlocks.end(); ++p) {
- if ((*p)->get_parent()->is_auth())
+ dout(20) << " must wrlock " << **p << " " << *(*p)->get_parent() << endl;
+ sorted.insert(*p);
+ if ((*p)->get_parent()->is_auth())
mustpin.insert(*p);
- else
- sorted.insert(*p);
}
-
+
+ // rdlocks
+ for (set<SimpleLock*>::iterator p = rdlocks.begin();
+ p != rdlocks.end();
+ ++p) {
+ dout(20) << " must rdlock " << **p << " " << *(*p)->get_parent() << endl;
+ sorted.insert(*p);
+ }
+
+
+ // AUTH PINS
map<int, set<MDSCacheObject*> > mustpin_remote; // mds -> (object set)
// can i auth pin them all now?
++p) {
MDSCacheObject *object = (*p)->get_parent();
- sorted.insert(*p); // sort in
-
dout(10) << " must authpin " << *object << endl;
if (mdr->is_auth_pinned(object))
return false;
}
- // sort in rdlocks too
- for (set<SimpleLock*>::iterator p = rdlocks.begin();
- p != rdlocks.end();
- ++p) {
- dout(20) << " must rdlock " << **p << " " << *(*p)->get_parent() << endl;
- sorted.insert(*p);
- }
-
// acquire locks.
// make sure they match currently acquired locks.
set<SimpleLock*, SimpleLock::ptr_lt>::iterator existing = mdr->locks.begin();
root->inode.nlink = 1;
root->inode.layout = g_OSD_MDDirLayout;
+ root->force_auth = mds->get_nodeid();
+
set_root( root );
add_inode( root );
// add in root
cur = add_replica_inode(m->get_inode(0), NULL);
+ cur->force_auth = m->get_source().num();
set_root(cur);
dout(7) << "discover_reply got root " << *cur << endl;
// add
cur = add_replica_inode(m->get_inode(0), NULL);
- set_root(cur);
+ cur->force_auth = m->get_source().num();
+
dout(7) << "discover_reply got stray " << *cur << endl;
// take waiters
// NOTE: this also defines the lock ordering!
#define LOCK_OTYPE_DN 1
-#define LOCK_OTYPE_IFILE 2
-#define LOCK_OTYPE_IAUTH 3
-#define LOCK_OTYPE_ILINK 4
-#define LOCK_OTYPE_IDIRFRAGTREE 5
-#define LOCK_OTYPE_IDIR 6
+#define LOCK_OTYPE_IVERSION 2
+#define LOCK_OTYPE_IFILE 3
+#define LOCK_OTYPE_IAUTH 4
+#define LOCK_OTYPE_ILINK 5
+#define LOCK_OTYPE_IDIRFRAGTREE 6
+#define LOCK_OTYPE_IDIR 7
//#define LOCK_OTYPE_DIR 7 // not used
inline const char *get_lock_type_name(int t) {
switch (t) {
case LOCK_OTYPE_DN: return "dn";
+ case LOCK_OTYPE_IVERSION: return "iversion";
case LOCK_OTYPE_IFILE: return "ifile";
case LOCK_OTYPE_IAUTH: return "iauth";
case LOCK_OTYPE_ILINK: return "ilink";
struct ptr_lt {
bool operator()(const SimpleLock* l, const SimpleLock* r) const {
- if (l->type < r->type) return true;
- if (l->type == r->type) return l->parent->is_lt(r->parent);
+ // first sort by object type (dn < inode)
+ if ((l->type>LOCK_OTYPE_DN) < (r->type>LOCK_OTYPE_DN)) return true;
+ if ((l->type>LOCK_OTYPE_DN) == (r->type>LOCK_OTYPE_DN)) {
+ // then sort by object
+ if (l->parent->is_lt(r->parent)) return true;
+ if (l->parent == r->parent) {
+ // then sort by (inode) lock type
+ if (l->type < r->type) return true;
+ }
+ }
return false;
}
};
}
bool can_rdlock(MDRequest *mdr) {
- if (state == LOCK_LOCK && mdr && xlock_by == mdr) return true; // xlocked by me. (actually, is this right?)
- if (state == LOCK_LOCK && !xlock_by && parent->is_auth()) return true;
+ //if (state == LOCK_LOCK && mdr && xlock_by == mdr) return true; // xlocked by me. (actually, is this right?)
+ //if (state == LOCK_LOCK && !xlock_by && parent->is_auth()) return true;
return (state == LOCK_SYNC);
}
bool can_xlock(MDRequest *mdr) {
- if (mdr && xlock_by == mdr) return true; // auth or replica! xlocked by me.
+ if (mdr && xlock_by == mdr) {
+ assert(state == LOCK_LOCK);
+ return true; // auth or replica! xlocked by me.
+ }
if (state == LOCK_LOCK && parent->is_auth() && !xlock_by) return true;
return false;
}
#define CDIR_AUTH_UNKNOWN -2
#define CDIR_AUTH_DEFAULT pair<int,int>(-1, -2)
#define CDIR_AUTH_UNDEF pair<int,int>(-2, -2)
-#define CDIR_AUTH_ROOTINODE pair<int,int>( 0, -2)
+//#define CDIR_AUTH_ROOTINODE pair<int,int>( 0, -2)