in->dirfragtree = st->dirfragtree; // FIXME look at the mask!
in->xattrs.swap(st->xattrs);
in->inode.dirstat = st->dirstat;
+ in->inode.rstat = st->rstat;
in->inode.ctime = st->ctime;
in->inode.max_size = st->max_size; // right?
}
-int Client::fill_stat(Inode *in, struct stat *st, frag_info_t *dirstat)
+int Client::fill_stat(Inode *in, struct stat *st, frag_info_t *dirstat, nest_info_t *rstat)
{
dout(10) << "fill_stat on " << in->inode.ino << " snap/dev" << in->snapid
<< " mode 0" << oct << in->inode.mode << dec
st->st_mtime = in->inode.mtime;
if (in->inode.is_dir()) {
//st->st_size = in->inode.dirstat.size();
- st->st_size = in->inode.dirstat.rbytes;
+ st->st_size = in->inode.rstat.rbytes;
st->st_blocks = 1;
} else {
st->st_size = in->inode.size;
if (dirstat)
*dirstat = in->inode.dirstat;
+ if (rstat)
+ *rstat = in->inode.rstat;
return in->lease_mask;
}
// find dentry based on filepath
Dentry *lookup(const filepath& path, snapid_t snap=CEPH_NOSNAP);
- int fill_stat(Inode *in, struct stat *st, frag_info_t *dirstat=0);
+ int fill_stat(Inode *in, struct stat *st, frag_info_t *dirstat=0, nest_info_t *rstat=0);
// trace generation
#define CEPH_LOCK_ILINK 16
#define CEPH_LOCK_IDFT 32 /* dir frag tree */
#define CEPH_LOCK_IDIR 64 /* mds internal */
-#define CEPH_LOCK_IXATTR 128
-#define CEPH_LOCK_ISNAP 256
+#define CEPH_LOCK_INEST 128 /* mds internal */
+#define CEPH_LOCK_IXATTR 256
+#define CEPH_LOCK_ISNAP 512
#define CEPH_LOCK_INO 2048 /* immutable inode bits; not actually a lock */
#define CEPH_LOCK_ICONTENT (CEPH_LOCK_IFILE|CEPH_LOCK_IDIR) /* alias for either filelock or dirlock */
{
return (a.sec() < b.sec()) || (a.sec() == b.sec() && a.usec() < b.usec());
}
+inline bool operator==(const utime_t& a, const utime_t& b)
+{
+ return a.sec() == b.sec() && a.usec() == b.usec();
+}
// ostream
inline std::ostream& operator<<(std::ostream& out, const utime_t& t)
out << " s=" << dir.fnode.fragstat.size()
<< "=" << dir.fnode.fragstat.nfiles
<< "+" << dir.fnode.fragstat.nsubdirs;
- out << " rb=" << dir.fnode.fragstat.rbytes << "/" << dir.fnode.accounted_fragstat.rbytes;
- out << " rf=" << dir.fnode.fragstat.rfiles << "/" << dir.fnode.accounted_fragstat.rfiles;
- out << " rd=" << dir.fnode.fragstat.rsubdirs << "/" << dir.fnode.accounted_fragstat.rsubdirs;
+ out << " rb=" << dir.fnode.rstat.rbytes << "/" << dir.fnode.accounted_rstat.rbytes;
+ out << " rf=" << dir.fnode.rstat.rfiles << "/" << dir.fnode.accounted_rstat.rfiles;
+ out << " rd=" << dir.fnode.rstat.rsubdirs << "/" << dir.fnode.accounted_rstat.rsubdirs;
out << " hs=" << dir.get_num_head_items() << "+" << dir.get_num_head_null();
out << ",ss=" << dir.get_num_snap_items() << "+" << dir.get_num_snap_null();
fnode.fragstat.nsubdirs++;
else
fnode.fragstat.nfiles++;
- fnode.fragstat.rbytes += pi->accounted_dirstat.rbytes;
- fnode.fragstat.rfiles += pi->accounted_dirstat.rfiles;
- fnode.fragstat.rsubdirs += pi->accounted_dirstat.rsubdirs;
- if (pi->accounted_dirstat.rctime > fnode.fragstat.rctime)
- fnode.fragstat.rctime = pi->accounted_dirstat.rctime;
+ fnode.rstat.rbytes += pi->accounted_rstat.rbytes;
+ fnode.rstat.rfiles += pi->accounted_rstat.rfiles;
+ fnode.rstat.rsubdirs += pi->accounted_rstat.rsubdirs;
+ fnode.rstat.ranchors += pi->accounted_rstat.ranchors;
+ fnode.rstat.rsnaprealms += pi->accounted_rstat.ranchors;
+ if (pi->accounted_rstat.rctime > fnode.rstat.rctime)
+ fnode.rstat.rctime = pi->accounted_rstat.rctime;
} else if (dn->is_remote()) {
if (dn->get_remote_d_type() == (S_IFDIR >> 12))
fnode.fragstat.nsubdirs++;
double fac = 1.0 / (double)(1 << bits); // for scaling load vecs
- frag_info_t olddiff; // old += f - af;
- bool changed_mtime;
- dout(10) << " fragstat " << fnode.fragstat << dendl;
- dout(10) << " accounted_fragstat " << fnode.accounted_fragstat << dendl;
+ nest_info_t olddiff; // old += f - af;
+ dout(10) << " rstat " << fnode.rstat << dendl;
+ dout(10) << " accounted_rstat " << fnode.accounted_rstat << dendl;
olddiff.zero();
- olddiff.take_diff(fnode.fragstat, fnode.accounted_fragstat, changed_mtime);
- dout(10) << " olddiff " << olddiff << dendl;
+ olddiff.take_diff(fnode.rstat, fnode.accounted_rstat);
+ dout(10) << " olddiff " << olddiff << dendl;
// create subfrag dirs
int n = 0;
// give any outstanding frag stat differential to first frag
// af[0] -= olddiff
dout(10) << "giving olddiff " << olddiff << " to " << *subfrags[0] << dendl;
- frag_info_t zero;
+ nest_info_t zero;
zero.zero();
- subfrags[0]->fnode.accounted_fragstat.take_diff(zero, olddiff, changed_mtime);
+ subfrags[0]->fnode.accounted_rstat.take_diff(zero, olddiff);
dout(10) << " " << subfrags[0]->fnode.accounted_fragstat << dendl;
purge_stolen(waiters, replay);
}
fnode_t fnode;
+ snapid_t first;
+ map<snapid_t,fnode_t> dirty_old_fnodes;
protected:
version_t projected_version;
out << " nl=" << in.inode.nlink;
}
- out << " rb=" << in.inode.dirstat.rbytes;
- if (in.is_projected()) out << "/" << in.inode.accounted_dirstat.rbytes;
- out << " rf=" << in.inode.dirstat.rfiles;
- if (in.is_projected()) out << "/" << in.inode.accounted_dirstat.rfiles;
- out << " rd=" << in.inode.dirstat.rsubdirs;
- if (in.is_projected()) out << "/" << in.inode.accounted_dirstat.rsubdirs;
+ out << " rb=" << in.inode.rstat.rbytes;
+ if (in.is_projected()) out << "/" << in.inode.accounted_rstat.rbytes;
+ out << " rf=" << in.inode.rstat.rfiles;
+ if (in.is_projected()) out << "/" << in.inode.accounted_rstat.rfiles;
+ out << " rd=" << in.inode.rstat.rsubdirs;
+ if (in.is_projected()) out << "/" << in.inode.accounted_rstat.rsubdirs;
// locks
out << " " << in.authlock;
out << " " << in.dirfragtreelock;
out << " " << in.dirlock;
out << " " << in.snaplock;
+ out << " " << in.nestlock;
} else
out << " " << in.filelock;
out << " " << in.xattrlock;
}
break;
+ case CEPH_LOCK_INEST:
+ {
+ dout(15) << "encode_lock_state inode.rstat is " << inode.rstat << dendl;
+ ::encode(inode.rstat, bl); // only meaningful if i am auth.
+ bufferlist tmp;
+ __u32 n = 0;
+ for (map<frag_t,CDir*>::iterator p = dirfrags.begin();
+ p != dirfrags.end();
+ ++p)
+ if (is_auth() || p->second->is_auth()) {
+ dout(15) << "encode_lock_state rstat for " << *p->second << dendl;
+ dout(20) << " rstat " << p->second->fnode.rstat << dendl;
+ dout(20) << " accounted_rstat " << p->second->fnode.accounted_rstat << dendl;
+ frag_t fg = p->second->dirfrag().frag;
+ ::encode(fg, tmp);
+ ::encode(p->second->fnode.rstat, tmp);
+ ::encode(p->second->fnode.accounted_rstat, tmp);
+ n++;
+ }
+ ::encode(n, bl);
+ bl.claim_append(tmp);
+ }
+ break;
+
case CEPH_LOCK_IXATTR:
::encode(xattrs, bl);
break;
case CEPH_LOCK_ISNAP:
encode_snap(bl);
break;
+
default:
assert(0);
}
break;
+ case CEPH_LOCK_INEST:
+ {
+ nest_info_t rstat;
+ ::decode(rstat, p);
+ if (!is_auth()) {
+ dout(10) << " taking inode rstat " << rstat << " for " << *this << dendl;
+ inode.rstat = rstat; // take inode summation if replica
+ }
+ __u32 n;
+ ::decode(n, p);
+ dout(10) << " ...got " << n << " rstats on " << *this << dendl;
+ while (n--) {
+ frag_t fg;
+ nest_info_t rstat;
+ nest_info_t accounted_rstat;
+ ::decode(fg, p);
+ ::decode(rstat, p);
+ ::decode(accounted_rstat, p);
+ dout(10) << fg << " got changed rstat " << rstat << dendl;
+ dout(20) << fg << " accounted_rstat " << accounted_rstat << dendl;
+
+ CDir *dir = get_dirfrag(fg);
+ if (is_auth()) {
+ assert(dir); // i am auth; i had better have this dir open
+ dout(10) << " " << fg << " rstat " << rstat << " on " << *dir << dendl;
+ dout(20) << " " << fg << " accounted_rstat " << accounted_rstat << dendl;
+ dir->fnode.rstat = rstat;
+ dir->fnode.accounted_rstat = accounted_rstat;
+ if (!(rstat == accounted_rstat))
+ dirlock.set_updated();
+ } else {
+ if (dir &&
+ dir->is_auth() &&
+ !(dir->fnode.accounted_rstat == rstat)) {
+ dout(10) << " setting accounted_rstat " << rstat << " and setting dirty bit on "
+ << *dir << dendl;
+ fnode_t *pf = dir->get_projected_fnode();
+ pf->accounted_rstat = rstat;
+ if (dir->is_auth())
+ dir->_set_dirty_flag(); // bit of a hack
+ }
+ }
+ }
+ }
+ break;
+
case CEPH_LOCK_IXATTR:
::decode(xattrs, p);
break;
xlist_dirty_dirfrag_dir.remove_myself();
break;
+ case CEPH_LOCK_INEST:
+ xlist_dirty_dirfrag_nest.remove_myself();
+ break;
+
case CEPH_LOCK_IDFT:
xlist_dirty_dirfrag_dirfragtree.remove_myself();
break;
}
break;
+ case CEPH_LOCK_INEST:
+ {
+ // adjust summation
+ assert(is_auth());
+ inode_t *pi = get_projected_inode();
+ dout(20) << " orig rstat " << pi->rstat << dendl;
+ for (map<frag_t,CDir*>::iterator p = dirfrags.begin();
+ p != dirfrags.end();
+ p++) {
+ fnode_t *pf = p->second->get_projected_fnode();
+ if (pf->accounted_rstat.version == pi->rstat.version) {
+ dout(20) << " frag " << p->first << " " << *p->second << dendl;
+ dout(20) << " rstat " << pf->rstat << dendl;
+ dout(20) << " accounted_rstat " << pf->rstat << dendl;
+ pi->rstat.take_diff(pf->rstat,
+ pf->accounted_rstat);
+ } else {
+ dout(20) << " frag " << p->first << " on " << *p->second << dendl;
+ dout(20) << " ignoring OLD accounted_rstat " << pf->rstat << dendl;
+ }
+ }
+ pi->rstat.version++;
+ dout(20) << " final rstat " << pi->rstat << dendl;
+ assert(pi->rstat.rfiles >= 0);
+ assert(pi->rstat.rsubdirs >= 0);
+ }
+ break;
+
case CEPH_LOCK_IDFT:
break;
e.files = i->dirstat.nfiles;
e.subdirs = i->dirstat.nsubdirs;
- i->dirstat.rctime.encode_timeval(&e.rctime);
- e.rbytes = i->dirstat.rbytes;
- e.rfiles = i->dirstat.rfiles;
- e.rsubdirs = i->dirstat.rsubdirs;
+ i->rstat.rctime.encode_timeval(&e.rctime);
+ e.rbytes = i->rstat.rbytes;
+ e.rfiles = i->rstat.rfiles;
+ e.rsubdirs = i->rstat.rsubdirs;
e.rdev = i->rdev;
e.fragtree.nsplits = dirfragtree._splits.size();
::encode(dirlock, bl);
::encode(xattrlock, bl);
::encode(snaplock, bl);
+ ::encode(nestlock, bl);
get(PIN_TEMPEXPORTING);
}
::decode(dirlock, p);
::decode(xattrlock, p);
::decode(snaplock, p);
+ ::decode(nestlock, p);
}
static const int WAIT_VERSIONLOCK_OFFSET = 4 + 4*SimpleLock::WAIT_BITS;
static const int WAIT_XATTRLOCK_OFFSET = 4 + 5*SimpleLock::WAIT_BITS;
static const int WAIT_SNAPLOCK_OFFSET = 4 + 6*SimpleLock::WAIT_BITS;
+ static const int WAIT_NESTLOCK_OFFSET = 4 + 7*SimpleLock::WAIT_BITS;
static const int WAIT_ANY_MASK = (0xffffffff);
SnapRealm *snaprealm;
SnapRealm *containing_realm;
- snapid_t first, last; // last=0 => multiversion or head.
+ snapid_t first, last;
map<snapid_t, old_inode_t> old_inodes; // key = last, value.first = first
+ set<snapid_t> dirty_old_dirstats;
bool is_multiversion() { return snaprealm || inode.is_dir(); }
snapid_t get_oldest_snap();
xlist<CInode*>::item xlist_caps;
xlist<CInode*>::item xlist_open_file;
xlist<CInode*>::item xlist_dirty_dirfrag_dir;
+ xlist<CInode*>::item xlist_dirty_dirfrag_nest;
xlist<CInode*>::item xlist_dirty_dirfrag_dirfragtree;
xlist<CInode*>::item xlist_purging_inode;
replica_caps_wanted(0),
xlist_dirty(this), xlist_caps(this), xlist_open_file(this),
xlist_dirty_dirfrag_dir(this),
+ xlist_dirty_dirfrag_nest(this),
xlist_dirty_dirfrag_dirfragtree(this),
xlist_purging_inode(this),
auth_pins(0), nested_auth_pins(0),
filelock(this, CEPH_LOCK_IFILE, WAIT_FILELOCK_OFFSET),
dirlock(this, CEPH_LOCK_IDIR, WAIT_DIRLOCK_OFFSET),
xattrlock(this, CEPH_LOCK_IXATTR, WAIT_XATTRLOCK_OFFSET),
- snaplock(this, CEPH_LOCK_ISNAP, WAIT_SNAPLOCK_OFFSET)
+ snaplock(this, CEPH_LOCK_ISNAP, WAIT_SNAPLOCK_OFFSET),
+ nestlock(this, CEPH_LOCK_INEST, WAIT_NESTLOCK_OFFSET)
{
memset(&inode, 0, sizeof(inode));
state = 0;
ScatterLock dirlock;
SimpleLock xattrlock;
SimpleLock snaplock;
+ ScatterLock nestlock;
SimpleLock* get_lock(int type) {
switch (type) {
case CEPH_LOCK_IDIR: return &dirlock;
case CEPH_LOCK_IXATTR: return &xattrlock;
case CEPH_LOCK_ISNAP: return &snaplock;
+ case CEPH_LOCK_INEST: return &nestlock;
}
return 0;
}
dirlock.replicate_relax();
xattrlock.replicate_relax();
snaplock.replicate_relax();
+ nestlock.replicate_relax();
}
__u32 dirlock_state;
__u32 xattrlock_state;
__u32 snaplock_state;
+ __u32 nestlock_state;
public:
CInodeDiscover() {}
dirlock_state = in->dirlock.get_replica_state();
xattrlock_state = in->xattrlock.get_replica_state();
snaplock_state = in->snaplock.get_replica_state();
+ nestlock_state = in->nestlock.get_replica_state();
}
CInodeDiscover(bufferlist::iterator &p) {
decode(p);
in->dirlock.set_state(dirlock_state);
in->xattrlock.set_state(xattrlock_state);
in->snaplock.set_state(snaplock_state);
+ in->nestlock.set_state(nestlock_state);
}
void encode(bufferlist &bl) const {
::encode(dirlock_state, bl);
::encode(xattrlock_state, bl);
::encode(snaplock_state, bl);
+ ::encode(nestlock_state, bl);
}
void decode(bufferlist::iterator &p) {
::decode(dirlock_state, p);
::decode(xattrlock_state, p);
::decode(snaplock_state, p);
+ ::decode(nestlock_state, p);
}
};
return file_eval_gather((FileLock*)lock);
case CEPH_LOCK_IDFT:
case CEPH_LOCK_IDIR:
+ case CEPH_LOCK_INEST:
return scatter_eval_gather((ScatterLock*)lock);
default:
return simple_eval_gather(lock);
return file_rdlock_start((FileLock*)lock, mut);
case CEPH_LOCK_IDFT:
case CEPH_LOCK_IDIR:
+ case CEPH_LOCK_INEST:
return scatter_rdlock_start((ScatterLock*)lock, mut);
default:
return simple_rdlock_start(lock, mut);
return file_rdlock_finish((FileLock*)lock, mut);
case CEPH_LOCK_IDFT:
case CEPH_LOCK_IDIR:
+ case CEPH_LOCK_INEST:
return scatter_rdlock_finish((ScatterLock*)lock, mut);
default:
return simple_rdlock_finish(lock, mut);
switch (lock->get_type()) {
case CEPH_LOCK_IDFT:
case CEPH_LOCK_IDIR:
+ case CEPH_LOCK_INEST:
return scatter_wrlock_start((ScatterLock*)lock, mut);
case CEPH_LOCK_IVERSION:
return local_wrlock_start((LocalLock*)lock, mut);
switch (lock->get_type()) {
case CEPH_LOCK_IDFT:
case CEPH_LOCK_IDIR:
+ case CEPH_LOCK_INEST:
return scatter_wrlock_finish((ScatterLock*)lock, mut);
case CEPH_LOCK_IVERSION:
return local_wrlock_finish((LocalLock*)lock, mut);
return local_xlock_start((LocalLock*)lock, mut);
case CEPH_LOCK_IDFT:
case CEPH_LOCK_IDIR:
+ case CEPH_LOCK_INEST:
return scatter_xlock_start((ScatterLock*)lock, mut);
default:
return simple_xlock_start(lock, mut);
return local_xlock_finish((LocalLock*)lock, mut);
case CEPH_LOCK_IDFT:
case CEPH_LOCK_IDIR:
+ case CEPH_LOCK_INEST:
return scatter_xlock_finish((ScatterLock*)lock, mut);
default:
return simple_xlock_finish(lock, mut);
dout(10) << "check_inode_max_size also forcing size "
<< pi->size << " -> " << new_size << dendl;
pi->size = new_size;
- pi->dirstat.rbytes = new_size;
+ pi->rstat.rbytes = new_size;
}
EOpen *le = new EOpen(mds->mdlog);
dout(7) << " size " << pi->size << " -> " << size
<< " for " << *in << dendl;
pi->size = size;
- pi->dirstat.rbytes = size;
+ pi->rstat.rbytes = size;
}
if (dirty_atime) {
dout(7) << " atime " << pi->atime << " -> " << atime
mut->ls = mds->mdlog->get_current_segment();
file_wrlock_force(&in->filelock, mut); // wrlock for duration of journal
mut->auth_pin(in);
- mdcache->predirty_journal_parents(mut, &le->metablob, in, 0, PREDIRTY_PRIMARY, false);
-
+ mdcache->predirty_journal_parents(mut, &le->metablob, in, 0, PREDIRTY_PRIMARY, 0, follows);
mdcache->journal_dirty_inode(&le->metablob, in, follows);
mds->mdlog->submit_entry(le, new C_Locker_FileUpdate_finish(this, in, mut, change_max));
case CEPH_LOCK_IDFT:
case CEPH_LOCK_IFILE:
case CEPH_LOCK_IDIR:
+ case CEPH_LOCK_INEST:
case CEPH_LOCK_IXATTR:
case CEPH_LOCK_ISNAP:
{
case CEPH_LOCK_IDFT: return &in->dirfragtreelock;
case CEPH_LOCK_IFILE: return &in->filelock;
case CEPH_LOCK_IDIR: return &in->dirlock;
+ case CEPH_LOCK_INEST: return &in->nestlock;
case CEPH_LOCK_IXATTR: return &in->xattrlock;
case CEPH_LOCK_ISNAP: return &in->snaplock;
}
case CEPH_LOCK_IDFT:
case CEPH_LOCK_IDIR:
+ case CEPH_LOCK_INEST:
handle_scatter_lock((ScatterLock*)lock, m);
break;
xlist<CInode*> open_files;
xlist<CInode*> dirty_dirfrag_dir;
+ xlist<CInode*> dirty_dirfrag_nest;
xlist<CInode*> dirty_dirfrag_dirfragtree;
xlist<MDSlaveUpdate*> slave_updates;
if (dir->inode->is_auth() &&
dir->inode->dirlock.is_stable()) {
// force the issue a bit
- if (!dir->inode->is_frozen())
+ if (!dir->inode->is_frozen()) {
mds->locker->scatter_eval(&dir->inode->dirlock);
- else
+ mds->locker->scatter_eval(&dir->inode->nestlock);
+ } else {
mds->locker->try_scatter_eval(&dir->inode->dirlock); // ** may or may not be auth_pinned **
+ mds->locker->try_scatter_eval(&dir->inode->nestlock); // ** may or may not be auth_pinned **
+ }
}
}
// ===================================
-// journal helpers
+// journal and snap/cow helpers
/*
* find first inode in cache that follows given snapid. otherwise, return current.
{
dout(10) << "journal_cow_dentry follows " << follows << " on " << *dn << dendl;
- // nothing to cow on a null dentry
+ // nothing to cow on a null dentry, fix caller
assert(!dn->is_null());
- /*
- * normally, we write to the head, and make a clone of ther previous
- * dentry+inode state. unless the follow snapid specified.
- */
-
if (dn->is_primary() && dn->inode->is_multiversion()) {
// multiversion inode.
CInode *in = dn->inode;
old.first = in->first;
old.inode = *in->get_previous_projected_inode();
old.xattrs = in->xattrs;
+
+ //if (!(old.inode.dirstat == old.inode.accounted_dirstat))
+ //in->dirty_old_dirstats.insert(follows);
in->first = follows+1;
*/
void MDCache::predirty_journal_parents(Mutation *mut, EMetaBlob *blob,
CInode *in, CDir *parent,
- int flags, int linkunlink)
+ int flags, int linkunlink,
+ snapid_t cfollows)
{
bool primary_dn = flags & PREDIRTY_PRIMARY;
bool do_parent_mtime = flags & PREDIRTY_DIR;
<< " linkunlink=" << linkunlink
<< (primary_dn ? " primary_dn":" remote_dn")
<< (shallow ? " SHALLOW":"")
+ << " follows " << cfollows
<< " " << *in << dendl;
if (!parent) {
inode_t *curi = in->get_projected_inode();
- __s64 drbytes = 1, drfiles = 0, drsubdirs = 0, dranchors = 0, drsnaprealms = 0;
- utime_t rctime;
-
// build list of inodes to wrlock, dirty, and update
list<CInode*> lsi;
CInode *cur = in;
// opportunistically adjust parent dirfrag
CInode *pin = parent->get_inode();
- if (do_parent_mtime || linkunlink) {
- assert(mut->wrlocks.count(&pin->dirlock) ||
- mut->is_slave()); // we are slave. master will have wrlocked the dir.
- }
-
// inode -> dirfrag
mut->auth_pin(parent);
mut->add_projected_fnode(parent);
fnode_t *pf = parent->project_fnode();
pf->version = parent->pre_dirty();
- if (do_parent_mtime) {
- pf->fragstat.mtime = mut->now;
- if (mut->now > pf->fragstat.rctime) {
- dout(10) << "predirty_journal_parents updating mtime on " << *parent << dendl;
- pf->fragstat.rctime = mut->now;
- } else {
- dout(10) << "predirty_journal_parents updating mtime UNDERWATER on " << *parent << dendl;
+ if (do_parent_mtime || linkunlink) {
+ assert(mut->wrlocks.count(&pin->dirlock) ||
+ mut->is_slave()); // we are slave. master will have wrlocked the dir.
+
+ if (do_parent_mtime) {
+ pf->fragstat.mtime = mut->now;
+ if (mut->now > pf->rstat.rctime) {
+ dout(10) << "predirty_journal_parents updating mtime on " << *parent << dendl;
+ pf->rstat.rctime = mut->now;
+ } else {
+ dout(10) << "predirty_journal_parents updating mtime UNDERWATER on " << *parent << dendl;
+ }
+ }
+ if (linkunlink) {
+ dout(10) << "predirty_journal_parents updating size on " << *parent << dendl;
+ if (in->is_dir()) {
+ pf->fragstat.nsubdirs += linkunlink;
+ pf->rstat.rsubdirs += linkunlink;
+ } else {
+ pf->fragstat.nfiles += linkunlink;
+ pf->rstat.rfiles += linkunlink;
+ }
}
}
- if (linkunlink) {
- dout(10) << "predirty_journal_parents updating size on " << *parent << dendl;
- if (in->is_dir())
- pf->fragstat.nsubdirs += linkunlink;
- else
- pf->fragstat.nfiles += linkunlink;
+
+
+ /*
+ if (follows == CEPH_NOSNAP || follows == 0)
+ follows = parent->inode->find_snaprealm()->get_latest_snap();
+
+ // cow fnode?
+ snapid_t follows = cfollows;
+ if (follows >= first &&
+ !(pf->fragstat == pf->accounted_fragstat)) {
+ dout(10) << " cow fnode, follows " << follows << dendl;
+ dirty_old_fnodes[follows] = parent->get_projected_fnode();
}
- if (primary_dn) {
+ first = follows+1;
+ */
+ // which fnode to write to?
+ //fnode_t *pf = 0;
+ /* fixme
+ if (dirty_old_fnodes.size() &&
+ dirty_old_fnodes.rbegin()->first > follows) {
+ map<snapid_t,fnode_t>::iterator p = dirty_old_fnodes.upper_bound(follows);
+ dout(10) << " cloning dirty_old_fnode " << p->first << " to follows " << follows << dendl;
+ dirty_old_fnodes[follows] = p->second;
+ pf = &p->fragstat;
+ }
+ }
+ */
+ //if (!pf) {
+
+ if (primary_dn) {
+ nest_info_t delta;
+ delta.zero();
if (linkunlink == 0) {
- drbytes = curi->dirstat.rbytes - curi->accounted_dirstat.rbytes;
- drfiles = curi->dirstat.rfiles - curi->accounted_dirstat.rfiles;
- drsubdirs = curi->dirstat.rsubdirs - curi->accounted_dirstat.rsubdirs;
- dranchors = curi->dirstat.ranchors - curi->accounted_dirstat.ranchors;
- drsnaprealms = curi->dirstat.rsnaprealms - curi->accounted_dirstat.rsnaprealms;
+ delta.add(curi->rstat);
+ delta.sub(curi->accounted_rstat);
} else if (linkunlink < 0) {
- drbytes = 0 - curi->accounted_dirstat.rbytes;
- drfiles = 0 - curi->accounted_dirstat.rfiles;
- drsubdirs = 0 - curi->accounted_dirstat.rsubdirs;
- dranchors = 0 - curi->accounted_dirstat.ranchors;
- drsnaprealms = 0 - curi->accounted_dirstat.rsnaprealms;
+ delta.sub(curi->accounted_rstat);
} else {
- drbytes = curi->dirstat.rbytes;
- drfiles = curi->dirstat.rfiles;
- drsubdirs = curi->dirstat.rsubdirs;
- dranchors = curi->dirstat.ranchors;
- drsnaprealms = curi->dirstat.rsnaprealms;
+ delta.add(curi->rstat);
}
- rctime = MAX(curi->ctime, curi->dirstat.rctime);
-
- dout(10) << "predirty_journal_parents delta "
- << drbytes << " bytes / " << drfiles << " files / " << drsubdirs << " subdirs for "
- << *parent << dendl;
- pf->fragstat.rbytes += drbytes;
- pf->fragstat.rfiles += drfiles;
- pf->fragstat.rsubdirs += drsubdirs;
- pf->fragstat.ranchors += dranchors;
- pf->fragstat.rsnaprealms += drsnaprealms;
- pf->fragstat.rctime = rctime;
-
- curi->accounted_dirstat = curi->dirstat;
- } else {
- dout(10) << "predirty_journal_parents no delta (remote dentry, or rename within same dir) in " << *parent << dendl;
- pf->fragstat.rfiles += linkunlink;
- }
+ dout(10) << "predirty_journal_parents delta " << delta << " " << *parent << dendl;
+ pf->rstat.add(delta);
+ curi->accounted_rstat = curi->rstat;
+ }
// stop?
if (pin->is_base())
stop = true;
}
if (!stop &&
- mut->wrlocks.count(&pin->dirlock) == 0 &&
+ mut->wrlocks.count(&pin->nestlock) == 0 &&
(!pin->can_auth_pin() ||
!pin->versionlock.can_wrlock() || // make sure we can take versionlock, too
- !mds->locker->scatter_wrlock_try(&pin->dirlock, mut, false))) { // ** do not initiate.. see above comment **
- dout(10) << "predirty_journal_parents can't wrlock one of " << pin->versionlock << " or " << pin->dirlock
+ !mds->locker->scatter_wrlock_try(&pin->nestlock, mut, false))) { // ** do not initiate.. see above comment **
+ dout(10) << "predirty_journal_parents can't wrlock one of " << pin->versionlock << " or " << pin->nestlock
<< " on " << *pin << dendl;
stop = true;
}
if (stop) {
- dout(10) << "predirty_journal_parents stop. marking dirlock on " << *pin << dendl;
- mds->locker->mark_updated_scatterlock(&pin->dirlock);
- mut->ls->dirty_dirfrag_dir.push_back(&pin->xlist_dirty_dirfrag_dir);
- mut->add_updated_scatterlock(&pin->dirlock);
+ dout(10) << "predirty_journal_parents stop. marking nestlock on " << *pin << dendl;
+ mds->locker->mark_updated_scatterlock(&pin->nestlock);
+ mut->ls->dirty_dirfrag_nest.push_back(&pin->xlist_dirty_dirfrag_nest);
+ mut->add_updated_scatterlock(&pin->nestlock);
break;
}
mds->locker->local_wrlock_grab(&pin->versionlock, mut);
+ assert(mut->wrlocks.count(&pin->nestlock) ||
+ mut->is_slave());
+
// dirfrag -> diri
mut->auth_pin(pin);
mut->add_projected_inode(pin);
inode_t *pi = pin->project_inode();
pi->version = pin->pre_dirty();
- pi->dirstat.version++;
- dout(15) << "predirty_journal_parents take_diff " << pf->fragstat << dendl;
- dout(15) << "predirty_journal_parents - " << pf->accounted_fragstat << dendl;
- bool touched_mtime = false;
- pi->dirstat.take_diff(pf->fragstat, pf->accounted_fragstat, touched_mtime);
- if (touched_mtime)
- pi->mtime = pi->ctime = pi->dirstat.mtime;
- dout(15) << "predirty_journal_parents gives " << pi->dirstat << " on " << *pin << dendl;
+
+ // dirstat
+ if (do_parent_mtime || linkunlink) {
+ pi->dirstat.version++;
+ dout(15) << "predirty_journal_parents take_diff " << pf->fragstat << dendl;
+ dout(15) << "predirty_journal_parents - " << pf->accounted_fragstat << dendl;
+ bool touched_mtime = false;
+ pi->dirstat.take_diff(pf->fragstat, pf->accounted_fragstat, touched_mtime);
+ if (touched_mtime)
+ pi->mtime = pi->ctime = pi->dirstat.mtime;
+ dout(15) << "predirty_journal_parents gives " << pi->dirstat << " on " << *pin << dendl;
+ }
+
+ // rstat
+ if (primary_dn) {
+ pi->rstat.version++;
+ dout(15) << "predirty_journal_parents take_diff " << pf->rstat << dendl;
+ dout(15) << "predirty_journal_parents - " << pf->accounted_rstat << dendl;
+ pi->rstat.take_diff(pf->rstat, pf->accounted_rstat);
+ dout(15) << "predirty_journal_parents gives " << pi->rstat << " on " << *pin << dendl;
+ }
// next parent!
cur = pin;
root->linklock.get_state(),
root->dirfragtreelock.get_state(),
root->filelock.get_state(),
- root->dirlock.get_state());
+ root->dirlock.get_state(),
+ root->nestlock.get_state(),
+ root->snaplock.get_state(),
+ root->xattrlock.get_state());
}
if (CInode *in = get_inode(MDS_INO_STRAY(p->first))) {
p->second->add_weak_inode(in->ino());
in->linklock.get_state(),
in->dirfragtreelock.get_state(),
in->filelock.get_state(),
- in->dirlock.get_state());
+ in->dirlock.get_state(),
+ in->nestlock.get_state(),
+ in->snaplock.get_state(),
+ in->xattrlock.get_state());
}
}
}
in->linklock.get_state(),
in->dirfragtreelock.get_state(),
in->filelock.get_state(),
- in->dirlock.get_state());
+ in->dirlock.get_state(),
+ in->nestlock.get_state(),
+ in->snaplock.get_state(),
+ in->xattrlock.get_state());
in->get_nested_dirfrags(nested);
}
}
in->linklock.get_replica_state(),
in->dirfragtreelock.get_replica_state(),
in->filelock.get_replica_state(),
- in->dirlock.get_replica_state());
+ in->dirlock.get_replica_state(),
+ in->nestlock.get_replica_state(),
+ in->snaplock.get_replica_state(),
+ in->xattrlock.get_replica_state());
}
}
}
in->linklock.get_replica_state(),
in->dirfragtreelock.get_replica_state(),
in->filelock.get_replica_state(),
- in->dirlock.get_replica_state());
+ in->dirlock.get_replica_state(),
+ in->nestlock.get_replica_state(),
+ in->snaplock.get_replica_state(),
+ in->xattrlock.get_replica_state());
}
if (survivor) {
in->linklock.get_replica_state(),
in->dirfragtreelock.get_replica_state(),
in->filelock.get_replica_state(),
- in->dirlock.get_replica_state());
+ in->dirlock.get_replica_state(),
+ in->nestlock.get_replica_state(),
+ in->snaplock.get_replica_state(),
+ in->xattrlock.get_replica_state());
}
// subdirs in this subtree?
root->linklock.get_replica_state(),
root->dirfragtreelock.get_replica_state(),
root->filelock.get_replica_state(),
- root->dirlock.get_replica_state());
+ root->dirlock.get_replica_state(),
+ root->nestlock.get_replica_state(),
+ root->snaplock.get_replica_state(),
+ root->xattrlock.get_replica_state());
}
if (stray)
for (map<int,int>::iterator r = stray->replicas_begin();
stray->linklock.get_replica_state(),
stray->dirfragtreelock.get_replica_state(),
stray->filelock.get_replica_state(),
- stray->dirlock.get_replica_state());
+ stray->dirlock.get_replica_state(),
+ stray->nestlock.get_replica_state(),
+ stray->snaplock.get_replica_state(),
+ stray->xattrlock.get_replica_state());
}
// send acks
inode_t *pi = in->project_inode();
if (add) {
pi->anchored = true;
- pi->dirstat.ranchors++;
+ pi->rstat.ranchors++;
in->parent->adjust_nested_anchors(1);
} else {
pi->anchored = false;
- pi->dirstat.ranchors--;
+ pi->rstat.ranchors--;
in->parent->adjust_nested_anchors(-1);
}
pi->version = in->pre_dirty();
inode_t *pi = in->project_inode();
pi->version = in->pre_dirty();
- pi->dirstat.rsnaprealms++;
+ pi->rstat.rsnaprealms++;
SnapRealm t(this, in);
t.created = mdr->more()->stid;
mdr->ls->dirty_dirfrag_dir.push_back(&diri->xlist_dirty_dirfrag_dir);
mdr->add_updated_scatterlock(&diri->dirlock);
+ // dirlock
+ mds->locker->mark_updated_scatterlock(&diri->nestlock);
+ mdr->ls->dirty_dirfrag_nest.push_back(&diri->xlist_dirty_dirfrag_nest);
+ mdr->add_updated_scatterlock(&diri->nestlock);
+
// journal new dirfrag fragstats for each new fragment.
for (list<CDir*>::iterator p = resultfrags.begin();
p != resultfrags.end();
// flags for predirty_journal_parents()
static const int PREDIRTY_PRIMARY = 1; // primary dn, adjust nested accounting
static const int PREDIRTY_DIR = 2; // update parent dir mtime/size
-static const int PREDIRTY_SHALLOW = 4; // only go to immediate parrent (for easier rollback)
+static const int PREDIRTY_SHALLOW = 4; // only go to immediate parent (for easier rollback)
class MDCache {
inode_t *journal_dirty_inode(EMetaBlob *metablob, CInode *in, snapid_t follows=CEPH_NOSNAP);
void predirty_journal_parents(Mutation *mut, EMetaBlob *blob,
CInode *in, CDir *parent,
- int flags, int linkunlink=0);
+ int flags, int linkunlink=0,
+ snapid_t follows=CEPH_NOSNAP);
// slaves
void add_uncommitted_master(metareqid_t reqid, LogSegment *ls, set<int> &slaves) {
else
rdlocks.insert(&dn->lock); // existing dn, rdlock
wrlocks.insert(&dn->dir->inode->dirlock); // also, wrlock on dir mtime
+ wrlocks.insert(&dn->dir->inode->nestlock); // also, wrlock on dir mtime
if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks))
return 0;
if ((newi->inode.mode & S_IFMT) == 0)
newi->inode.mode |= S_IFREG;
newi->inode.version = dn->pre_dirty() - 1;
- newi->inode.dirstat.rfiles = 1;
+ newi->inode.rstat.rfiles = 1;
newi->projected_parent = dn;
dn->first = newi->first = follows+1;
newi->inode.mode |= S_IFDIR;
newi->inode.layout = g_default_mds_dir_layout;
newi->inode.version = dn->pre_dirty() - 1;
- newi->inode.dirstat.rsubdirs = 1;
+ newi->inode.rstat.rsubdirs = 1;
dn->first = newi->first = follows+1;
newi->symlink = req->get_path2();
newi->inode.size = newi->symlink.length();
newi->inode.version = dn->pre_dirty() - 1;
- newi->inode.dirstat.rfiles = 1;
+ newi->inode.rstat.rfiles = 1;
dn->first = newi->first = follows+1;
rdlocks.insert(&linktrace[i]->lock);
xlocks.insert(&dn->lock);
wrlocks.insert(&dn->dir->inode->dirlock);
+ wrlocks.insert(&dn->dir->inode->nestlock);
for (int i=0; i<(int)targettrace.size(); i++)
rdlocks.insert(&targettrace[i]->lock);
xlocks.insert(&targeti->linklock);
rollback.old_ctime = targeti->inode.ctime; // we hold versionlock; no concorrent projections
fnode_t *pf = targeti->get_parent_dn()->get_dir()->get_projected_fnode();
rollback.old_dir_mtime = pf->fragstat.mtime;
- rollback.old_dir_rctime = pf->fragstat.rctime;
+ rollback.old_dir_rctime = pf->rstat.rctime;
rollback.was_inc = inc;
::encode(rollback, le->rollback);
mdr->more()->rollback_bl = le->rollback;
pf->version = parent->pre_dirty();
if (pf->fragstat.mtime == pi->ctime) {
pf->fragstat.mtime = rollback.old_dir_mtime;
- if (pf->fragstat.rctime == pi->ctime)
- pf->fragstat.rctime = rollback.old_dir_rctime;
+ if (pf->rstat.rctime == pi->ctime)
+ pf->rstat.rctime = rollback.old_dir_rctime;
mut->add_updated_scatterlock(&parent->get_inode()->dirlock);
+ mut->add_updated_scatterlock(&parent->get_inode()->nestlock);
}
// inode
rdlocks.insert(&trace[i]->lock);
xlocks.insert(&dn->lock);
wrlocks.insert(&dn->dir->inode->dirlock);
+ wrlocks.insert(&dn->dir->inode->nestlock);
xlocks.insert(&in->linklock);
- if (straydn)
+ if (straydn) {
wrlocks.insert(&straydn->dir->inode->dirlock);
+ wrlocks.insert(&straydn->dir->inode->nestlock);
+ }
if (in->is_dir())
rdlocks.insert(&in->dirlock); // to verify it's empty
set<SimpleLock*> rdlocks, wrlocks, xlocks;
// straydn?
- if (straydn)
+ if (straydn) {
wrlocks.insert(&straydn->dir->inode->dirlock);
+ wrlocks.insert(&straydn->dir->inode->nestlock);
+ }
// rdlock sourcedir path, xlock src dentry
for (int i=0; i<(int)srctrace.size()-1; i++)
rdlocks.insert(&srctrace[i]->lock);
xlocks.insert(&srcdn->lock);
wrlocks.insert(&srcdn->dir->inode->dirlock);
+ wrlocks.insert(&srcdn->dir->inode->nestlock);
// rdlock destdir path, xlock dest dentry
for (int i=0; i<(int)desttrace.size(); i++)
rdlocks.insert(&desttrace[i]->lock);
xlocks.insert(&destdn->lock);
wrlocks.insert(&destdn->dir->inode->dirlock);
+ wrlocks.insert(&destdn->dir->inode->nestlock);
// xlock versionlock on srci if remote?
// this ensures it gets safely remotely auth_pinned, avoiding deadlock;
if (srcdn->is_primary() &&
(srcdn->inode->is_anchored() ||
- (srcdn->inode->is_dir() && (srcdn->inode->inode.dirstat.ranchors ||
+ (srcdn->inode->is_dir() && (srcdn->inode->inode.rstat.ranchors ||
srcdn->inode->nested_anchors ||
!mdcache->is_leaf_subtree(mdcache->get_subtree_root(srcdn->dir))))) &&
!mdr->more()->src_reanchor_atid) {
mdr->ls,
mdr->more()->cap_imports, updated_scatterlocks);
srcdn->inode->dirlock.clear_updated();
+ srcdn->inode->nestlock.clear_updated();
// hack: force back to !auth and clean, temporarily
srcdn->inode->state_clear(CInode::STATE_AUTH);
// sub off target
if (destdn->is_auth() && !destdn->is_null())
mdcache->predirty_journal_parents(mdr, metablob, destdn->inode, destdn->dir,
- (destdn->is_primary() ? PREDIRTY_PRIMARY:0)|predirty_dir, -1);
+ (destdn->is_primary() ? PREDIRTY_PRIMARY:0)|predirty_dir, -1);
// move srcdn
int predirty_primary = (srcdn->is_primary() && srcdn->dir != destdn->dir) ? PREDIRTY_PRIMARY:0;
int flags = predirty_dir | predirty_primary;
if (srcdn->is_auth())
- mdcache->predirty_journal_parents(mdr, metablob, srcdn->inode, srcdn->dir, flags, -1);
+ mdcache->predirty_journal_parents(mdr, metablob, srcdn->inode, srcdn->dir, PREDIRTY_SHALLOW|flags, -1);
if (destdn->is_auth())
mdcache->predirty_journal_parents(mdr, metablob, srcdn->inode, destdn->dir, flags, 1);
rollback.orig_src.dirfrag = srcdn->dir->dirfrag();
rollback.orig_src.dirfrag_old_mtime = srcdn->dir->get_projected_fnode()->fragstat.mtime;
- rollback.orig_src.dirfrag_old_rctime = srcdn->dir->get_projected_fnode()->fragstat.rctime;
+ rollback.orig_src.dirfrag_old_rctime = srcdn->dir->get_projected_fnode()->rstat.rctime;
rollback.orig_src.dname = srcdn->name;
if (srcdn->is_primary())
rollback.orig_src.ino = srcdn->inode->ino();
rollback.orig_dest.dirfrag = destdn->dir->dirfrag();
rollback.orig_dest.dirfrag_old_mtime = destdn->dir->get_projected_fnode()->fragstat.mtime;
- rollback.orig_dest.dirfrag_old_rctime = destdn->dir->get_projected_fnode()->fragstat.rctime;
+ rollback.orig_dest.dirfrag_old_rctime = destdn->dir->get_projected_fnode()->rstat.rctime;
rollback.orig_dest.dname = destdn->name;
if (destdn->is_primary())
rollback.orig_dest.ino = destdn->inode->ino();
if (straydn) {
rollback.stray.dirfrag = straydn->dir->dirfrag();
rollback.stray.dirfrag_old_mtime = straydn->dir->get_projected_fnode()->fragstat.mtime;
- rollback.stray.dirfrag_old_rctime = straydn->dir->get_projected_fnode()->fragstat.rctime;
+ rollback.stray.dirfrag_old_rctime = straydn->dir->get_projected_fnode()->rstat.rctime;
rollback.stray.dname = straydn->name;
}
::encode(rollback, mdr->more()->rollback_bl);
}
void _rollback_repair_dir(Mutation *mut, CDir *dir, rename_rollback::drec &r, utime_t ctime,
- bool isdir, int linkunlink, bool primary, frag_info_t &dirstat)
+ bool isdir, int linkunlink, bool primary, frag_info_t &dirstat, nest_info_t &rstat)
{
fnode_t *pf;
if (dir->is_auth()) {
if (isdir) {
pf->fragstat.nsubdirs += linkunlink;
- pf->fragstat.rsubdirs += linkunlink;
+ pf->rstat.rsubdirs += linkunlink;
} else {
pf->fragstat.nfiles += linkunlink;
- pf->fragstat.rfiles += linkunlink;
+ pf->rstat.rfiles += linkunlink;
}
if (primary) {
- pf->fragstat.rbytes += linkunlink * dirstat.rbytes;
- pf->fragstat.rfiles += linkunlink * dirstat.rfiles;
- pf->fragstat.rsubdirs += linkunlink * dirstat.rsubdirs;
- pf->fragstat.ranchors += linkunlink * dirstat.ranchors;
+ pf->rstat.rbytes += linkunlink * rstat.rbytes;
+ pf->rstat.rfiles += linkunlink * rstat.rfiles;
+ pf->rstat.rsubdirs += linkunlink * rstat.rsubdirs;
+ pf->rstat.ranchors += linkunlink * rstat.ranchors;
+ pf->rstat.rsnaprealms += linkunlink * rstat.rsnaprealms;
}
if (pf->fragstat.mtime == ctime) {
pf->fragstat.mtime = r.dirfrag_old_mtime;
- if (pf->fragstat.rctime == ctime)
- pf->fragstat.rctime = r.dirfrag_old_rctime;
+ if (pf->rstat.rctime == ctime)
+ pf->rstat.rctime = r.dirfrag_old_rctime;
mut->add_updated_scatterlock(&dir->get_inode()->dirlock);
+ mut->add_updated_scatterlock(&dir->get_inode()->nestlock);
}
}
pi->ctime = rollback.orig_src.old_ctime;
_rollback_repair_dir(mut, srcdir, rollback.orig_src, rollback.ctime,
- in->is_dir(), 1, srcdn->is_primary(), pi->dirstat);
+ in->is_dir(), 1, srcdn->is_primary(), pi->dirstat, pi->rstat);
// repair dest
CInode *target = 0;
}
if (target)
_rollback_repair_dir(mut, destdir, rollback.orig_dest, rollback.ctime,
- target->is_dir(), 0, destdn->is_primary(), ti->dirstat);
+ target->is_dir(), 0, destdn->is_primary(), ti->dirstat, ti->rstat);
else {
frag_info_t blah;
- _rollback_repair_dir(mut, destdir, rollback.orig_dest, rollback.ctime, 0, -1, 0, blah);
+ nest_info_t blah2;
+ _rollback_repair_dir(mut, destdir, rollback.orig_dest, rollback.ctime, 0, -1, 0, blah, blah2);
}
// repair stray
if (straydir)
_rollback_repair_dir(mut, straydir, rollback.stray, rollback.ctime,
- target->is_dir(), -1, true, ti->dirstat);
+ target->is_dir(), -1, true, ti->dirstat, ti->rstat);
dout(-10) << " srcdn back to " << *srcdn << dendl;
dout(-10) << " srci back to " << *srcdn->inode << dendl;
in->inode.mode |= S_IFREG;
in->inode.version = dn->pre_dirty() - 1;
in->inode.max_size = in->get_layout_size_increment();
- in->inode.dirstat.rfiles = 1;
+ in->inode.rstat.rfiles = 1;
in->projected_parent = dn;
dn->first = in->first = follows+1;
case CEPH_LOCK_ILINK: return "ilink";
case CEPH_LOCK_IDFT: return "idft";
case CEPH_LOCK_IDIR: return "idir";
+ case CEPH_LOCK_INEST: return "inest";
case CEPH_LOCK_IXATTR: return "ixattr";
case CEPH_LOCK_ISNAP: return "isnap";
case CEPH_LOCK_INO: return "ino";
if (!gather) gather = new C_Gather;
mds->locker->scatter_nudge(&in->dirfragtreelock, gather->new_sub());
}
+ for (xlist<CInode*>::iterator p = dirty_dirfrag_nest.begin(); !p.end(); ++p) {
+ CInode *in = *p;
+ dout(10) << "try_to_expire waiting for nest flush on " << *in << dendl;
+ if (!gather) gather = new C_Gather;
+ mds->locker->scatter_nudge(&in->nestlock, gather->new_sub());
+ }
// open files
if (!open_files.empty()) {
__s64 nsubdirs; // subdirs
__s64 size() const { return nfiles + nsubdirs; }
+ void zero() {
+ memset(this, 0, sizeof(*this));
+ }
+
+ // *this += cur - acc; acc = cur
+ void take_diff(const frag_info_t &cur, frag_info_t &acc, bool& touched_mtime) {
+ if (!(cur.mtime == acc.mtime)) {
+ mtime = cur.mtime;
+ touched_mtime = true;
+ }
+ nfiles += cur.nfiles - acc.nfiles;
+ nsubdirs += cur.nsubdirs - acc.nsubdirs;
+ acc = cur;
+ acc.version = version;
+ }
+
+ void encode(bufferlist &bl) const {
+ ::encode(version, bl);
+ ::encode(mtime, bl);
+ ::encode(nfiles, bl);
+ ::encode(nsubdirs, bl);
+ }
+ void decode(bufferlist::iterator &bl) {
+ ::decode(version, bl);
+ ::decode(mtime, bl);
+ ::decode(nfiles, bl);
+ ::decode(nsubdirs, bl);
+ }
+};
+WRITE_CLASS_ENCODER(frag_info_t)
+
+inline bool operator==(const frag_info_t &l, const frag_info_t &r) {
+ return memcmp(&l, &r, sizeof(l)) == 0;
+}
+
+inline ostream& operator<<(ostream &out, const frag_info_t &f) {
+ return out << "f(v" << f.version
+ << " m" << f.mtime
+ << " " << f.size() << "=" << f.nfiles << "+" << f.nsubdirs
+ << ")";
+}
+
+struct nest_info_t {
+ version_t version;
+
// this frag + children
utime_t rctime;
__s64 rbytes;
void zero() {
memset(this, 0, sizeof(*this));
}
- void take_diff(const frag_info_t &cur, frag_info_t &acc, bool& touched_mtime) {
- if (cur.mtime > mtime) {
- rctime = mtime = cur.mtime;
- touched_mtime = true;
- }
- nfiles += cur.nfiles - acc.nfiles;
- nsubdirs += cur.nsubdirs - acc.nsubdirs;
+ void sub(const nest_info_t &other) {
+ add(other, -1);
+ }
+ void add(const nest_info_t &other, int fac=1) {
+ if (other.rctime > rctime)
+ rctime = other.rctime;
+ rbytes += fac*other.rbytes;
+ rfiles += fac*other.rfiles;
+ rsubdirs += fac*other.rsubdirs;
+ ranchors += fac*other.ranchors;
+ rsnaprealms += fac*other.rsnaprealms;
+ }
+
+ // *this += cur - acc; acc = cur
+ void take_diff(const nest_info_t &cur, nest_info_t &acc) {
if (cur.rctime > rctime)
rctime = cur.rctime;
rbytes += cur.rbytes - acc.rbytes;
void encode(bufferlist &bl) const {
::encode(version, bl);
- ::encode(mtime, bl);
- ::encode(nfiles, bl);
- ::encode(nsubdirs, bl);
::encode(rbytes, bl);
::encode(rfiles, bl);
::encode(rsubdirs, bl);
}
void decode(bufferlist::iterator &bl) {
::decode(version, bl);
- ::decode(mtime, bl);
- ::decode(nfiles, bl);
- ::decode(nsubdirs, bl);
::decode(rbytes, bl);
::decode(rfiles, bl);
::decode(rsubdirs, bl);
::decode(rctime, bl);
}
};
-WRITE_CLASS_ENCODER(frag_info_t)
+WRITE_CLASS_ENCODER(nest_info_t)
-inline bool operator==(const frag_info_t &l, const frag_info_t &r) {
+inline bool operator==(const nest_info_t &l, const nest_info_t &r) {
return memcmp(&l, &r, sizeof(l)) == 0;
}
-inline ostream& operator<<(ostream &out, const frag_info_t &f) {
- return out << "f(v" << f.version
- << " m" << f.mtime
- << " " << f.size() << "=" << f.nfiles << "+" << f.nsubdirs
- << " rc" << f.rctime
- << " b" << f.rbytes
- << " a" << f.ranchors
- << " sr" << f.rsnaprealms
- << " " << f.rsize() << "=" << f.rfiles << "+" << f.rsubdirs
+inline ostream& operator<<(ostream &out, const nest_info_t &n) {
+ return out << "n(v" << n.version
+ << " rc" << n.rctime
+ << " b" << n.rbytes
+ << " a" << n.ranchors
+ << " sr" << n.rsnaprealms
+ << " " << n.rsize() << "=" << n.rfiles << "+" << n.rsubdirs
<< ")";
}
uint64_t time_warp_seq; // count of (potential) mtime/atime timewarps (i.e., utimes())
// dirfrag, recursive accounting
- frag_info_t dirstat;
- frag_info_t accounted_dirstat; // what dirfrag has seen
+ frag_info_t dirstat;
+ nest_info_t rstat, accounted_rstat;
// special stuff
version_t version; // auth only
::encode(time_warp_seq, bl);
::encode(dirstat, bl);
- ::encode(accounted_dirstat, bl);
+ ::encode(rstat, bl);
+ ::encode(accounted_rstat, bl);
::encode(version, bl);
::encode(file_data_version, bl);
::decode(time_warp_seq, p);
::decode(dirstat, p);
- ::decode(accounted_dirstat, p);
+ ::decode(rstat, p);
+ ::decode(accounted_rstat, p);
::decode(version, p);
::decode(file_data_version, p);
struct fnode_t {
version_t version;
frag_info_t fragstat, accounted_fragstat;
+ nest_info_t rstat, accounted_rstat;
void encode(bufferlist &bl) const {
::encode(version, bl);
::encode(fragstat, bl);
::encode(accounted_fragstat, bl);
+ ::encode(rstat, bl);
+ ::encode(accounted_rstat, bl);
}
void decode(bufferlist::iterator &bl) {
::decode(version, bl);
::decode(fragstat, bl);
::decode(accounted_fragstat, bl);
+ ::decode(rstat, bl);
+ ::decode(accounted_rstat, bl);
}
};
WRITE_CLASS_ENCODER(fnode_t)
version_t time_warp_seq;
frag_info_t dirstat;
+ nest_info_t rstat;
string symlink; // symlink content (if symlink)
fragtree_t dirfragtree;
memset(&dirstat, 0, sizeof(dirstat));
dirstat.nfiles = e.files;
dirstat.nsubdirs = e.subdirs;
- dirstat.rctime.decode_timeval(&e.rctime);
- dirstat.rbytes = e.rbytes;
- dirstat.rfiles = e.rfiles;
- dirstat.rsubdirs = e.rsubdirs;
+
+ rstat.rctime.decode_timeval(&e.rctime);
+ rstat.rbytes = e.rbytes;
+ rstat.rfiles = e.rfiles;
+ rstat.rsubdirs = e.rsubdirs;
int n = e.fragtree.nsplits;
while (n) {
int32_t linklock;
int32_t dirfragtreelock;
int32_t filelock;
- int32_t dirlock;
+ int32_t dirlock, nestlock, snaplock, xattrlock;
inode_strong() {}
- inode_strong(int n, int cw=0, int a=0, int l=0, int dft=0, int f=0, int dl=0) :
+ inode_strong(int n, int cw=0, int a=0, int l=0, int dft=0, int f=0, int dl=0, int nl=0, int snl=0, int xal=0) :
caps_wanted(cw),
nonce(n),
- authlock(a), linklock(l), dirfragtreelock(dft), filelock(f), dirlock(dl) { }
+ authlock(a), linklock(l), dirfragtreelock(dft), filelock(f), dirlock(dl), nestlock(nl), snaplock(snl), xattrlock(xal) { }
void encode(bufferlist &bl) const {
::encode(caps_wanted, bl);
::encode(nonce, bl);
::encode(dirfragtreelock, bl);
::encode(filelock, bl);
::encode(dirlock, bl);
+ ::encode(nestlock, bl);
+ ::encode(snaplock, bl);
+ ::encode(xattrlock, bl);
}
void decode(bufferlist::iterator &bl) {
::decode(caps_wanted, bl);
::decode(dirfragtreelock, bl);
::decode(filelock, bl);
::decode(dirlock, bl);
+ ::decode(nestlock, bl);
+ ::decode(snaplock, bl);
+ ::decode(xattrlock, bl);
}
};
WRITE_CLASS_ENCODER(inode_strong)
void add_weak_inode(inodeno_t i) {
weak_inodes.insert(i);
}
- void add_strong_inode(inodeno_t i, int n, int cw, int a, int l, int dft, int f, int dl) {
- strong_inodes[i] = inode_strong(n, cw, a, l, dft, f, dl);
+ void add_strong_inode(inodeno_t i, int n, int cw, int a, int l, int dft, int f, int dl, int nl, int snl, int xl) {
+ strong_inodes[i] = inode_strong(n, cw, a, l, dft, f, dl, nl, snl, xl);
}
void add_full_inode(inode_t &i, const string& s, const fragtree_t &f) {
full_inodes.push_back(inode_full(i, s, f));