dirlock(this, CEPH_LOCK_IDIR, WAIT_DIRLOCK_OFFSET),
xattrlock(this, CEPH_LOCK_IXATTR, WAIT_XATTRLOCK_OFFSET),
snaplock(this, CEPH_LOCK_ISNAP, WAIT_SNAPLOCK_OFFSET),
- nestlock(this, CEPH_LOCK_INEST, WAIT_NESTLOCK_OFFSET)
+ nestlock(this, CEPH_LOCK_INEST, WAIT_NESTLOCK_OFFSET),
+ loner_cap(-1)
{
memset(&inode, 0, sizeof(inode));
state = 0;
// -- caps -- (new)
// client caps
- int count_nonstale_caps() {
+ int loner_cap;
+
+ bool choose_loner() {
+ assert(loner_cap < 0);
+
+ if (!mds_caps_wanted.empty())
+ return false;
+
int n = 0;
+ int loner;
for (map<int,Capability*>::iterator it = client_caps.begin();
it != client_caps.end();
it++)
- if (!it->second->is_stale()) {
- if (n) return false;
+ if (!it->second->is_stale() &&
+ (it->second->wanted() & (CEPH_CAP_WR|CEPH_CAP_RD))) {
+ if (n)
+ return false;
n++;
+ loner = it->first;
}
+ if (n == 1) {
+ loner_cap = loner;
+ return true;
+ }
+ return false;
+ }
+
+ int count_nonstale_caps() {
+ int n = 0;
+ for (map<int,Capability*>::iterator it = client_caps.begin();
+ it != client_caps.end();
+ it++)
+ if (!it->second->is_stale())
+ n++;
return n;
}
+ int get_loner() {
+ return loner_cap;
+ }
bool is_any_caps() { return !client_caps.empty(); }
bool is_any_nonstale_caps() { return count_nonstale_caps(); }
- bool is_loner_cap() {
- if (!mds_caps_wanted.empty())
- return false;
- return count_nonstale_caps() == 1;
- }
map<int,Capability*>& get_client_caps() { return client_caps; }
Capability *get_client_cap(int client) {
}
// caps issued, wanted
- int get_caps_issued() {
+ int get_caps_issued(int *ploner = 0, int *pother = 0) {
int c = 0;
+ int loner = 0, other = 0;
+ if (!is_auth())
+ loner_cap = -1;
for (map<int,Capability*>::iterator it = client_caps.begin();
it != client_caps.end();
- it++)
- c |= it->second->issued();
+ it++) {
+ int i = it->second->issued();
+ c |= i;
+ if (it->first == loner_cap)
+ loner |= i;
+ else
+ other |= i;
+ }
+ if (ploner) *ploner = loner;
+ if (pother) *pother = other;
return c;
}
- int get_caps_wanted() {
+ int get_caps_wanted(int *ploner = 0, int *pother = 0) {
int w = 0;
+ int loner = 0, other = 0;
for (map<int,Capability*>::iterator it = client_caps.begin();
it != client_caps.end();
it++) {
- if (!it->second->is_stale())
- w |= it->second->wanted();
+ if (!it->second->is_stale()) {
+ int t = it->second->wanted();
+ w |= t;
+ if (it->first == loner_cap)
+ loner |= t;
+ else
+ other |= t;
+ }
//cout << " get_caps_wanted client " << it->first << " " << cap_string(it->second.wanted()) << endl;
}
if (is_auth())
it != mds_caps_wanted.end();
it++) {
w |= it->second;
+ other |= it->second;
//cout << " get_caps_wanted mds " << it->first << " " << cap_string(it->second) << endl;
}
+ if (ploner) *ploner = loner;
+ if (pother) *pother = other;
return w;
}
// -----auth-------- ---replica-------
#define LOCK_SYNC_ 1 // AR R . / C R . . . L R . / C R . . . L stat()
-#define LOCK_GSYNCL -12 // A . . / C ? . . . L loner -> sync (*)
+#define LOCK_GSYNCL -12 // A . . / C r . . . L * loner -> sync
#define LOCK_GSYNCM -13 // A . . / . R . . . L
#define LOCK_LOCK_ 2 // AR R W / C . . . B . . . / C . . . . . truncate()
#define LOCK_MIXED 6 // AR . . / . R W A . L . . / . R . . . L
#define LOCK_GMIXEDR -7 // AR R . / . R . . . L . . / . R . . . L
-#define LOCK_GMIXEDL -8 // A . . / . . . . . L loner -> mixed
+#define LOCK_GMIXEDL -8 // A . . / . r w a . L * loner -> mixed
-#define LOCK_LONER 9 // A . . / C R W A B L (lock)
-#define LOCK_GLONERR -10 // A . . / . R . . . L
-#define LOCK_GLONERM -11 // A . . / . R W A . L
-
-// (*) FIXME: how to let old loner keep R, somehow, during GSYNCL
-
-// 4 stable
-// +9 transition
-// 13 total
+#define LOCK_LONER 9 // A . . / c r w a b L * (lock)
+#define LOCK_GLONERR -10 // A . . / . R . . . L
+#define LOCK_GLONERM -11 // A . . / . R W A . L
+#define LOCK_GLONERL -15 // A . . / c . . . b . *
+// * <- varies if client is loner vs non-loner.
+
inline const char *get_filelock_state_name(int n) {
switch (n) {
case LOCK_SYNC: return "sync";
case LOCK_LONER: return "loner";
case LOCK_GLONERR: return "glonerr";
case LOCK_GLONERM: return "glonerm";
+ case LOCK_GLONERL: return "glonerl";
default: assert(0); return 0;
}
}
case LOCK_LONER:
case LOCK_GLONERR:
case LOCK_GLONERM:
+ case LOCK_GLONERL:
return LOCK_LOCK;
case LOCK_MIXED:
case LOCK_GMIXEDR:
parent->is_auth() &&
(state == LOCK_LOCK || state == LOCK_GLOCKM || state == LOCK_GLOCKL ||
state == LOCK_MIXED || state == LOCK_GMIXEDL ||
- state == LOCK_LONER || state == LOCK_GLONERM ||
+ state == LOCK_LONER || state == LOCK_GLONERM || state == LOCK_GLONERL ||
state == LOCK_GSYNCM || state == LOCK_GSYNCL);
}
void get_wrlock(bool force) {
return CEPH_CAP_PIN |
CEPH_CAP_RDCACHE | CEPH_CAP_RD | CEPH_CAP_LAZYIO;
}
- int caps_allowed() {
+ int caps_allowed(bool loner) {
if (parent->is_auth())
switch (state) {
case LOCK_SYNC:
case LOCK_GMIXEDR:
return CEPH_CAP_PIN | CEPH_CAP_RD | CEPH_CAP_LAZYIO;
case LOCK_GMIXEDL:
- return CEPH_CAP_PIN;
+ return CEPH_CAP_PIN | (loner ? (CEPH_CAP_RD | CEPH_CAP_WR | CEPH_CAP_WREXTEND) : 0);
case LOCK_LONER: // single client writer, of course.
- return CEPH_CAP_PIN | CEPH_CAP_RDCACHE | CEPH_CAP_RD | CEPH_CAP_WR | CEPH_CAP_WREXTEND | CEPH_CAP_WRBUFFER | CEPH_CAP_LAZYIO | CEPH_CAP_EXCL;
+ return CEPH_CAP_PIN | CEPH_CAP_LAZYIO |
+ ( loner ? (CEPH_CAP_RDCACHE | CEPH_CAP_RD | CEPH_CAP_WR | CEPH_CAP_WREXTEND | CEPH_CAP_WRBUFFER | CEPH_CAP_EXCL) : 0 );
case LOCK_GLONERR:
return CEPH_CAP_PIN | CEPH_CAP_RD | CEPH_CAP_LAZYIO;
case LOCK_GLONERM:
return CEPH_CAP_PIN | CEPH_CAP_RD | CEPH_CAP_WR | CEPH_CAP_WREXTEND | CEPH_CAP_LAZYIO;
+ case LOCK_GLONERL:
+ return CEPH_CAP_PIN | (loner ? (CEPH_CAP_RDCACHE | CEPH_CAP_WRBUFFER) : 0);
case LOCK_GSYNCL:
- return CEPH_CAP_PIN | CEPH_CAP_RDCACHE | CEPH_CAP_LAZYIO;
+ return CEPH_CAP_PIN | CEPH_CAP_RDCACHE | (loner ? CEPH_CAP_RD:0) | CEPH_CAP_LAZYIO;
case LOCK_GSYNCM:
return CEPH_CAP_PIN | CEPH_CAP_RD | CEPH_CAP_LAZYIO;
}
return 0;
}
+ // true if we are in a "loner" mode that distinguishes between a loner and everyone else
+ bool is_loner_mode() {
+ return state == LOCK_GSYNCL ||
+ state == LOCK_GLOCKL ||
+ state == LOCK_GMIXEDL ||
+ state == LOCK_LONER ||
+ state == LOCK_GLONERL;
+ }
+
+
void print(ostream& out) {
out << "(";
out << get_lock_type_name(get_type()) << " ";
bool Locker::issue_caps(CInode *in)
{
// allowed caps are determined by the lock mode.
- int all_allowed = in->filelock.caps_allowed();
- dout(7) << "issue_caps filelock allows=" << cap_string(all_allowed)
- << " on " << *in << dendl;
+ int all_allowed = in->filelock.caps_allowed(false);
+ // loner mode? if so, we restict allows caps to a single loner client
+ bool loner_mode = in->filelock.is_loner_mode();
+ int loner_allowed;
+ if (loner_mode)
+ loner_allowed = in->filelock.caps_allowed(true);
+ else
+ loner_allowed = all_allowed;
+
+ int loner = -1;
+ if (loner_mode) {
+ loner = in->get_loner();
+ dout(7) << "issue_caps filelock loner client" << loner
+ << " allowed=" << cap_string(loner_allowed)
+ << ", others allowed=" << cap_string(all_allowed)
+ << " on " << *in << dendl;
+ } else {
+ dout(7) << "issue_caps filelock allowed=" << cap_string(all_allowed)
+ << " on " << *in << dendl;
+ }
+
// count conflicts with
int nissued = 0;
continue;
// do not issue _new_ bits when size|mtime is projected
- int allowed = all_allowed;
+ int allowed;
+ if (loner_mode && loner == it->first)
+ allowed = loner_allowed;
+ else
+ allowed = all_allowed;
+
int careful = CEPH_CAP_EXCL|CEPH_CAP_WRBUFFER|CEPH_CAP_RDCACHE;
int pending = cap->pending();
if (sizemtime_is_projected)
allowed &= ~careful | pending; // only allow "careful" bits if already issued
- dout(20) << " all_allowed " << cap_string(all_allowed)
+
+ dout(20) << " client" << it->first
<< " pending " << cap_string(pending)
<< " allowed " << cap_string(allowed)
<< " wanted " << cap_string(cap->wanted())
void Locker::file_eval_gather(FileLock *lock)
{
CInode *in = (CInode*)lock->get_parent();
- int issued = in->get_caps_issued();
+
+ int loner_allowed = lock->caps_allowed(true);
+ int other_allowed = lock->caps_allowed(false);
+
+ int loner_issued, other_issued;
+ in->get_caps_issued(&loner_issued, &other_issued);
- dout(7) << "file_eval_gather issued " << cap_string(issued)
- << " vs " << cap_string(lock->caps_allowed())
- << " on " << *lock << " on " << *lock->get_parent()
- << dendl;
+ dout(7) << "file_eval_gather issued "
+ << cap_string(loner_issued) << "/" << cap_string(other_issued) << " vs "
+ << cap_string(loner_allowed) << "/" << cap_string(other_allowed)
+ << " on " << *lock << " on " << *lock->get_parent() << dendl;
if (lock->is_stable())
return; // nothing for us to do here!
!lock->is_gathering() &&
!lock->is_wrlocked() &&
lock->get_num_client_lease() == 0 &&
- ((issued & ~lock->caps_allowed()) == 0)) {
+ ((loner_issued & ~loner_allowed) == 0) &&
+ ((other_issued & ~other_allowed) == 0)) {
if (in->state_test(CInode::STATE_NEEDSRECOVER)) {
dout(7) << "file_eval_gather finished gather, but need to recover" << dendl;
case LOCK_GLOCKM:
case LOCK_GLOCKL:
lock->set_state(LOCK_LOCK);
-
+ in->loner_cap = -1;
+
// waiters
lock->get_rdlock();
lock->finish_waiters(SimpleLock::WAIT_STABLE|SimpleLock::WAIT_WR|SimpleLock::WAIT_RD);
case LOCK_GMIXEDL:
lock->set_state(LOCK_MIXED);
+ in->loner_cap = -1;
if (in->is_replicated()) {
// data
// to loner
case LOCK_GLONERR:
- lock->set_state(LOCK_LONER);
- lock->finish_waiters(SimpleLock::WAIT_STABLE);
- lock->get_parent()->auth_unpin(lock);
- break;
-
case LOCK_GLONERM:
+ case LOCK_GLONERL:
lock->set_state(LOCK_LONER);
lock->finish_waiters(SimpleLock::WAIT_STABLE);
lock->get_parent()->auth_unpin(lock);
case LOCK_GSYNCL:
case LOCK_GSYNCM:
lock->set_state(LOCK_SYNC);
+ in->loner_cap = -1;
{ // bcast data to replicas
bufferlist softdata;
// [replica] finished caps gather?
if (!in->is_auth() &&
lock->get_num_client_lease() == 0 &&
- ((issued & ~lock->caps_allowed()) == 0)) {
+ ((other_issued & ~other_allowed)) == 0) {
switch (lock->get_state()) {
case LOCK_GMIXEDR:
{
void Locker::file_eval(FileLock *lock)
{
CInode *in = (CInode*)lock->get_parent();
- int wanted = in->get_caps_wanted();
- bool loner = in->is_loner_cap();
+ int loner_wanted, other_wanted;
+ int wanted = in->get_caps_wanted(&loner_wanted, &other_wanted);
dout(7) << "file_eval wanted=" << cap_string(wanted)
<< " filelock=" << *lock << " on " << *lock->get_parent()
- << " loner=" << loner
+ << " loner " << in->get_loner()
<< dendl;
assert(lock->get_parent()->is_auth());
if (lock->is_xlocked() ||
lock->is_wrlocked() ||
lock->get_parent()->is_frozen()) return;
+
+ if (lock->get_state() == LOCK_LONER) {
+ // lose loner?
+ int loner_issued, other_issued;
+ in->get_caps_issued(&loner_issued, &other_issued);
+
+ if (in->get_loner() >= 0) {
+ if ((loner_wanted & (CEPH_CAP_WR|CEPH_CAP_RD)) == 0 ||
+ (other_wanted & (CEPH_CAP_WR|CEPH_CAP_RD))) {
+ // we should lose it.
+ if ((other_wanted & CEPH_CAP_WR) ||
+ lock->is_waiter_for(SimpleLock::WAIT_WR) ||
+ lock->is_wrlocked())
+ file_mixed(lock);
+ else
+ file_sync(lock);
+ }
+ }
+ }
// * -> loner?
- if (!lock->is_rdlocked() &&
- !lock->is_waiter_for(SimpleLock::WAIT_WR) &&
- (wanted & (CEPH_CAP_WR|CEPH_CAP_WRBUFFER)) &&
- loner &&
- lock->get_state() != LOCK_LONER) {
- dout(7) << "file_eval stable, bump to loner " << *lock << " on " << *lock->get_parent() << dendl;
+ else if (lock->get_state() != LOCK_LONER &&
+ !lock->is_rdlocked() &&
+ !lock->is_waiter_for(SimpleLock::WAIT_WR) &&
+ (wanted & (CEPH_CAP_WR|CEPH_CAP_WRBUFFER)) &&
+ in->choose_loner()) {
+ dout(7) << "file_eval stable, bump to loner " << *lock
+ << " on " << *lock->get_parent() << dendl;
file_loner(lock);
}
// * -> mixed?
- else if ((!lock->is_rdlocked() &&
- !lock->is_waiter_for(SimpleLock::WAIT_WR) &&
- (wanted & CEPH_CAP_RD) &&
- (wanted & CEPH_CAP_WR) &&
- !(loner && lock->get_state() == LOCK_LONER) &&
- lock->get_state() != LOCK_MIXED) ||
- (!loner && in->is_any_nonstale_caps() && lock->get_state() == LOCK_LONER)) {
- dout(7) << "file_eval stable, bump to mixed " << *lock << " on " << *lock->get_parent() << dendl;
+ else if (lock->get_state() != LOCK_MIXED &&
+ !lock->is_rdlocked() &&
+ !lock->is_waiter_for(SimpleLock::WAIT_WR) &&
+ (wanted & CEPH_CAP_RD) &&
+ (wanted & CEPH_CAP_WR)) {
+ dout(7) << "file_eval stable, bump to mixed " << *lock
+ << " on " << *lock->get_parent() << dendl;
file_mixed(lock);
}
// * -> sync?
- else if (!in->filelock.is_waiter_for(SimpleLock::WAIT_WR) &&
- !(wanted & (CEPH_CAP_WR|CEPH_CAP_WRBUFFER)) &&
+ else if (lock->get_state() != LOCK_SYNC &&
+ !in->filelock.is_waiter_for(SimpleLock::WAIT_WR) &&
+ !(wanted & (CEPH_CAP_WR|CEPH_CAP_WRBUFFER))
//((wanted & CEPH_CAP_RD) ||
- //in->is_replicated() ||
- //lock->get_num_client_lease() ||
+ //in->is_replicated() ||
+ //lock->get_num_client_lease() ||
//(!loner && lock->get_state() == LOCK_LONER)) &&
- !(loner && lock->get_state() == LOCK_LONER) && // leave loner in loner state
- lock->get_state() != LOCK_SYNC) {
- dout(7) << "file_eval stable, bump to sync " << *lock << " on " << *lock->get_parent() << dendl;
+ ) {
+ dout(7) << "file_eval stable, bump to sync " << *lock
+ << " on " << *lock->get_parent() << dendl;
file_sync(lock);
}
- // * -> lock? (if not replicated or open)
-/*
- else if (!in->is_replicated() &&
- wanted == 0 &&
- lock->get_num_client_lease() == 0 &&
- lock->get_state() != LOCK_LOCK) {
- file_lock(lock);
- }
-*/
-
else
issue_caps(in);
}
}
int gather = 0;
- int issued = in->get_caps_issued();
- if (issued & ~lock->caps_allowed()) {
+ int loner_issued, other_issued;
+ in->get_caps_issued(&loner_issued, &other_issued);
+ if ((loner_issued & ~lock->caps_allowed(true)) ||
+ (other_issued & ~lock->caps_allowed(false))) {
issue_caps(in);
gather++;
}
}
lock->set_state(LOCK_SYNC);
+ in->loner_cap = -1;
issue_caps(in);
return true;
}
revoke_client_leases(lock);
gather++;
}
- int issued = in->get_caps_issued();
- if (issued & ~lock->caps_allowed()) {
+ int loner_issued, other_issued;
+ in->get_caps_issued(&loner_issued, &other_issued);
+ if ((loner_issued & ~lock->caps_allowed(true)) ||
+ (other_issued & ~lock->caps_allowed(false))) {
issue_caps(in);
gather++;
}
if (gather)
lock->get_parent()->auth_pin(lock);
- else
+ else {
lock->set_state(LOCK_LOCK);
+ in->loner_cap = -1;
+ }
}
revoke_client_leases(lock);
gather++;
}
- int issued = in->get_caps_issued();
- if (issued & ~lock->caps_allowed()) {
+ int loner_issued, other_issued;
+ in->get_caps_issued(&loner_issued, &other_issued);
+ if ((loner_issued & ~lock->caps_allowed(true)) ||
+ (other_issued & ~lock->caps_allowed(false))) {
issue_caps(in);
gather++;
}
lock->get_parent()->auth_pin(lock);
else {
lock->set_state(LOCK_MIXED);
+ in->loner_cap = -1;
issue_caps(in);
}
}
assert(in->is_auth());
assert(lock->is_stable());
- assert(in->count_nonstale_caps() == 1 && in->mds_caps_wanted.empty());
+ assert(in->get_loner() >= 0 && in->mds_caps_wanted.empty());
- if (lock->get_state() != LOCK_LOCK) { // LONER replicas are LOCK
- switch (lock->get_state()) {
- case LOCK_SYNC: lock->set_state(LOCK_GLONERR); break;
- case LOCK_MIXED: lock->set_state(LOCK_GLONERM); break;
- default: assert(0);
- }
- int gather = 0;
-
- if (in->is_replicated()) {
- send_lock_message(lock, LOCK_AC_LOCK);
- lock->init_gather();
- gather++;
- }
- if (lock->get_num_client_lease()) {
- revoke_client_leases(lock);
- gather++;
- }
- if (in->state_test(CInode::STATE_NEEDSRECOVER)) {
- mds->mdcache->queue_file_recover(in);
- mds->mdcache->do_file_recover();
- gather++;
- }
-
- if (gather) {
- lock->get_parent()->auth_pin(lock);
- return;
- }
+ switch (lock->get_state()) {
+ case LOCK_SYNC: lock->set_state(LOCK_GLONERR); break;
+ case LOCK_MIXED: lock->set_state(LOCK_GLONERM); break;
+ case LOCK_LOCK: lock->set_state(LOCK_GLONERL); break;
+ default: assert(0);
+ }
+ int gather = 0;
+
+ if (in->is_replicated()) {
+ send_lock_message(lock, LOCK_AC_LOCK);
+ lock->init_gather();
+ gather++;
+ }
+ if (lock->get_num_client_lease()) {
+ revoke_client_leases(lock);
+ gather++;
+ }
+ int loner_issued, other_issued;
+ in->get_caps_issued(&loner_issued, &other_issued);
+ dout(10) << " issued loner " << cap_string(loner_issued) << " other " << cap_string(other_issued) << dendl;
+ if ((loner_issued & ~lock->caps_allowed(true)) ||
+ (other_issued & ~lock->caps_allowed(false))) {
+ issue_caps(in);
+ gather++;
+ }
+ if (in->state_test(CInode::STATE_NEEDSRECOVER)) {
+ mds->mdcache->queue_file_recover(in);
+ mds->mdcache->do_file_recover();
+ gather++;
+ }
+
+ if (gather) {
+ lock->get_parent()->auth_pin(lock);
+ } else {
+ lock->set_state(LOCK_LONER);
+ issue_caps(in);
}
-
- lock->set_state(LOCK_LONER);
- issue_caps(in);
}
dout(7) << "handle_file_lock a=" << m->get_action() << " from " << from << " "
<< *in << " filelock=" << *lock << dendl;
- int issued = in->get_caps_issued();
-
switch (m->get_action()) {
// -- replica --
case LOCK_AC_SYNC:
lock->set_state(LOCK_GLOCKR);
// call back caps?
- if (issued & CEPH_CAP_RD) {
+ int loner_issued, other_issued;
+ in->get_caps_issued(&loner_issued, &other_issued);
+ if ((loner_issued & ~lock->caps_allowed(true)) ||
+ (other_issued & ~lock->caps_allowed(false))) {
dout(7) << "handle_file_lock client readers, gathering caps on " << *in << dendl;
issue_caps(in);
break;
if (lock->get_state() == LOCK_SYNC) {
// MIXED
- if (issued & CEPH_CAP_RD) {
+ lock->set_state(LOCK_GMIXEDR);
+ int loner_issued, other_issued;
+ in->get_caps_issued(&loner_issued, &other_issued);
+ if ((loner_issued & ~lock->caps_allowed(true)) ||
+ (other_issued & ~lock->caps_allowed(false))) {
// call back client caps
- lock->set_state(LOCK_GMIXEDR);
issue_caps(in);
break;
- } else {
- // no clients, go straight to mixed
- lock->set_state(LOCK_MIXED);
-
- // ack
- MLock *reply = new MLock(lock, LOCK_AC_MIXEDACK, mds->get_nodeid());
- mds->send_message_mds(reply, from);
}
+
+ lock->set_state(LOCK_MIXED);
+
+ // ack
+ MLock *reply = new MLock(lock, LOCK_AC_MIXEDACK, mds->get_nodeid());
+ mds->send_message_mds(reply, from);
} else {
// LOCK
lock->set_state(LOCK_MIXED);