st->st_ctime = MAX(in->inode.ctime, in->inode.mtime);
st->st_atime = in->inode.atime;
st->st_mtime = in->inode.mtime;
- st->st_size = in->inode.size;
+ if (in->inode.is_dir())
+ st->st_size = in->inode.nested.rbytes;
+ else
+ st->st_size = in->inode.size;
st->st_blksize = MAX(ceph_file_layout_su(in->inode.layout), 4096);
st->st_blocks = in->inode.size ? DIV_ROUND_UP(in->inode.size, st->st_blksize):0;
return in->lease_mask;
if (in.is_freezing_inode()) out << " FREEZING=" << in.auth_pin_freeze_allowance;
if (in.is_frozen_inode()) out << " FROZEN";
+ out << " s=" << in.inode.size;
+ out << " rb=" << in.inode.nested.rbytes << "/" << in.inode.accounted_nested.rbytes;
+
// locks
out << " " << in.authlock;
out << " " << in.linklock;
version_t CInode::pre_dirty()
{
- assert(parent);
+ assert(parent || projected_parent);
version_t pv;
if (projected_parent)
pv = projected_parent->pre_dirty(get_projected_version());
inodeno_t ino() const { return inode.ino; }
inode_t& get_inode() { return inode; }
CDentry* get_parent_dn() { return parent; }
+ CDentry* get_projected_parent_dn() { return projected_parent ? projected_parent:parent; }
CDir *get_parent_dir();
CInode *get_parent_inode();
struct C_Locker_FileUpdate_finish : public Context {
Locker *locker;
CInode *in;
+ list<CInode*> nest_updates;
LogSegment *ls;
bool share;
- C_Locker_FileUpdate_finish(Locker *l, CInode *i, LogSegment *s, bool e=false) :
+ C_Locker_FileUpdate_finish(Locker *l, CInode *i, LogSegment *s, list<CInode*> &ls, bool e=false) :
locker(l), in(i), ls(s), share(e) {
+ nest_updates.swap(ls);
in->get(CInode::PIN_PTRWAITER);
}
void finish(int r) {
- locker->file_update_finish(in, ls, share);
+ locker->file_update_finish(in, ls, nest_updates, share);
}
};
-void Locker::file_update_finish(CInode *in, LogSegment *ls, bool share)
+void Locker::file_update_finish(CInode *in, LogSegment *ls, list<CInode*> &nest_updates, bool share)
{
dout(10) << "file_update_finish on " << *in << dendl;
in->pop_and_dirty_projected_inode(ls);
in->put(CInode::PIN_PTRWAITER);
+
+ for (list<CInode*>::iterator p = nest_updates.begin();
+ p != nest_updates.end();
+ p++) {
+ (*p)->pop_and_dirty_projected_inode(ls);
+ scatter_wrlock_finish(&(*p)->dirlock, 0);
+ }
+
file_wrlock_finish(&in->filelock);
if (share && in->is_auth() && in->filelock.is_stable())
share_inode_max_size(in);
pi->max_size = new_max;
EOpen *le = new EOpen(mds->mdlog);
le->metablob.add_dir_context(in->get_parent_dir());
+ list<CInode*> nest_updates;
+ predirty_nested(&le->metablob, in, nest_updates);
le->metablob.add_primary_dentry(in->parent, true, 0, pi);
LogSegment *ls = mds->mdlog->get_current_segment();
le->add_ino(in->ino());
ls->open_files.push_back(&in->xlist_open_file);
- mds->mdlog->submit_entry(le, new C_Locker_FileUpdate_finish(this, in, ls, true));
+ mds->mdlog->submit_entry(le, new C_Locker_FileUpdate_finish(this, in, ls, nest_updates, true));
file_wrlock_start(&in->filelock, forcewrlock); // wrlock for duration of journal
return true;
}
pi->time_warp_seq = m->get_time_warp_seq();
}
le->metablob.add_dir_context(in->get_parent_dir());
+ list<CInode*> nest_updates;
+ predirty_nested(&le->metablob, in, nest_updates);
le->metablob.add_primary_dentry(in->parent, true, 0, pi);
LogSegment *ls = mds->mdlog->get_current_segment();
- mds->mdlog->submit_entry(le, new C_Locker_FileUpdate_finish(this, in, ls, change_max));
+ mds->mdlog->submit_entry(le, new C_Locker_FileUpdate_finish(this, in, ls, nest_updates, change_max));
file_wrlock_start(&in->filelock); // wrlock for duration of journal
}
// nested ---------------------------------------------------------------
+void Locker::predirty_nested(EMetaBlob *blob, CInode *in, list<CInode*> &ls)
+{
+ assert(ls.empty());
+
+ CDir *parent = in->get_projected_parent_dn()->get_dir();
+ blob->add_dir_context(parent);
+
+ // initial diff from *in
+ inode_t *curi = in->get_projected_inode();
+ __u64 drbytes;
+ __u64 drfiles;
+ utime_t rctime;
+ if (in->is_dir()) {
+ drbytes = curi->nested.rbytes - curi->accounted_nested.rbytes;
+ drfiles = curi->nested.rfiles - curi->accounted_nested.rfiles;
+ rctime = MAX(curi->ctime, curi->nested.rctime);
+ } else {
+ drbytes = curi->size - curi->accounted_nested.rbytes;
+ drfiles = 1 - curi->accounted_nested.rfiles;
+ rctime = curi->ctime;
+ }
+
+ dout(10) << "predirty_nested delta " << drbytes << " bytes / " << drfiles << " files from " << *in << dendl;
+
+ // build list of inodes to wrlock, dirty, and update
+ CInode *cur = in;
+ while (parent) {
+ assert(cur->is_auth());
+ assert(parent->is_auth());
+
+ // opportunistically adjust parent dirfrag
+ CInode *pin = parent->get_inode();
+
+ dout(10) << "predirty_nested delta " << drbytes << " bytes / " << drfiles << " files for " << *pin << dendl;
+ if (pin->is_base())
+ break;
+
+ if (!scatter_wrlock_try(&pin->dirlock)) {
+ dout(10) << "predirty_nested can't wrlock " << pin->dirlock << " on " << *pin << dendl;
+ break;
+ }
+
+ ls.push_back(pin);
+
+ // FIXME
+ if (!pin->is_auth()) {
+ assert(0);
+ break;
+ }
+
+ // project update
+ version_t ppv = pin->pre_dirty();
+ inode_t *pi = pin->project_inode();
+ pi->version = ppv;
+ pi->nested.rbytes += drbytes;
+ pi->nested.rfiles += drfiles;
+ pi->nested.rctime = rctime;
+
+ frag_t fg = parent->dirfrag().frag;
+ pin->dirfrag_nested[fg].rbytes += drbytes;
+ pin->dirfrag_nested[fg].rfiles += drfiles;
+ pin->dirfrag_nested[fg].rctime = rctime;
+
+ curi->accounted_nested.rbytes += drbytes;
+ curi->accounted_nested.rfiles += drfiles;
+ curi->accounted_nested.rctime = rctime;
+
+ cur = pin;
+ curi = pi;
+ parent = cur->get_projected_parent_dn()->get_dir();
+
+ drbytes = curi->nested.rbytes - curi->accounted_nested.rbytes;
+ drfiles = curi->nested.rfiles - curi->accounted_nested.rfiles;
+ rctime = MAX(curi->ctime, curi->nested.rctime);
+ }
+
+ // now, stick it in the blob
+ for (list<CInode*>::iterator p = ls.begin();
+ p != ls.end();
+ p++) {
+ CInode *cur = *p;
+ inode_t *pi = cur->get_projected_inode();
+ blob->add_primary_dentry(cur->get_parent_dn(), true, 0, pi);
+ }
+}
+
// locks ----------------------------------------------------------------
}
-bool Locker::scatter_wrlock_start(ScatterLock *lock, MDRequest *mdr)
+bool Locker::scatter_wrlock_try(ScatterLock *lock)
{
- dout(7) << "scatter_wrlock_start on " << *lock
- << " on " << *lock->get_parent() << dendl;
-
// pre-twiddle?
if (lock->get_parent()->is_auth() &&
!lock->get_parent()->is_replicated() &&
// can wrlock?
if (lock->can_wrlock()) {
lock->get_wrlock();
+ return true;
+ }
+
+ return false;
+}
+
+bool Locker::scatter_wrlock_start(ScatterLock *lock, MDRequest *mdr)
+{
+ dout(7) << "scatter_wrlock_start on " << *lock
+ << " on " << *lock->get_parent() << dendl;
+
+ if (scatter_wrlock_try(lock)) {
mdr->wrlocks.insert(lock);
mdr->locks.insert(lock);
return true;
void scatter_tempsync(ScatterLock *lock);
bool scatter_rdlock_start(ScatterLock *lock, MDRequest *mdr);
void scatter_rdlock_finish(ScatterLock *lock, MDRequest *mdr);
-public:
- bool scatter_wrlock_start(ScatterLock *lock, MDRequest *mdr); // public for Server's predirty_nested
-protected:
+ bool scatter_wrlock_try(ScatterLock *lock);
+ bool scatter_wrlock_start(ScatterLock *lock, MDRequest *mdr);
void scatter_wrlock_finish(ScatterLock *lock, MDRequest *mdr);
void scatter_writebehind(ScatterLock *lock);
};
void scatter_writebehind_finish(ScatterLock *lock, LogSegment *ls);
+public:
+ void predirty_nested(class EMetaBlob *blob, CInode *in, list<CInode*> &ls);
+
// local
protected:
bool local_wrlock_start(LocalLock *lock, MDRequest *mdr);
void request_inode_file_caps(CInode *in);
void handle_inode_file_caps(class MInodeFileCaps *m);
- void file_update_finish(CInode *in, LogSegment *ls, bool share);
+ void file_update_finish(CInode *in, LogSegment *ls, list<CInode*> &nest_updates, bool share);
public:
bool check_inode_max_size(CInode *in, bool forcewrlock=false);
private:
}
}
-void Server::predirty_nested(MDRequest *mdr, EMetaBlob *blob, CInode *in, CDir *parent)
+void Server::predirty_nested(MDRequest *mdr, EMetaBlob *blob, CInode *in)
{
- if (!parent)
- parent = in->get_parent_dir();
-
- // initial diff from *in
- inode_t *curi = in->get_projected_inode();
- __u64 drbytes;
- __u64 drfiles;
- utime_t rctime;
- if (in->is_dir()) {
- drbytes = curi->nested.rbytes - curi->accounted_nested.rbytes;
- drfiles = curi->nested.rfiles - curi->accounted_nested.rfiles;
- rctime = MAX(curi->ctime, curi->nested.rctime);
- } else {
- drbytes = curi->size - curi->accounted_nested.rbytes;
- drfiles = 1 - curi->accounted_nested.rfiles;
- rctime = curi->ctime;
- }
-
- blob->add_dir_context(in->get_parent_dir());
-
- // build list of inodes to wrlock, dirty, and update
list<CInode*> ls;
- CInode *cur = in;
- while (parent) {
- assert(cur->is_auth());
- assert(parent->is_auth());
-
- // opportunistically adjust parent dirfrag
- CInode *pin = parent->get_inode();
- if (!pin->dirlock.can_wrlock()) {
- dout(10) << " can't wrlock " << pin->dirlock << " on " << *pin << dendl;
- break;
- }
- bool r = mds->locker->scatter_wrlock_start(&pin->dirlock, mdr);
- assert(r);
-
- if (!pin->is_auth()) {
- break;
- }
-
- // project update
- version_t ppv = pin->pre_dirty();
- inode_t *pi = pin->project_inode();
- pi->version = ppv;
- pi->nested.rbytes += drbytes;
- pi->nested.rfiles += drfiles;
- pi->nested.rctime = rctime;
- mdr->add_projected_inode(pin);
- ls.push_back(pin);
-
- frag_t fg = parent->dirfrag().frag;
- pin->dirfrag_nested[fg].rbytes += drbytes;
- pin->dirfrag_nested[fg].rfiles += drfiles;
- pin->dirfrag_nested[fg].rctime = rctime;
-
- curi->accounted_nested.rbytes += drbytes;
- curi->accounted_nested.rfiles += drfiles;
- curi->accounted_nested.rctime = rctime;
-
- cur = pin;
- curi = pi;
- parent = cur->get_parent_dir();
- }
+ mds->locker->predirty_nested(blob, in, ls);
- // now, stick it in the blob
for (list<CInode*>::iterator p = ls.begin();
p != ls.end();
p++) {
- CInode *cur = *p;
- inode_t *pi = cur->get_projected_inode();
- blob->add_primary_dentry(cur->get_parent_dn(), true, 0, pi);
+ SimpleLock *lock = &(*p)->dirlock;
+ mdr->wrlocks.insert(lock);
+ mdr->locks.insert(lock);
+ mdr->add_projected_inode(*p);
}
}
-
-
// ===============================================================================
// STAT
// dir inode's mtime
mds->server->dirty_dn_diri(mdr, dn, dirpv);
-
+ mdr->pop_and_dirty_projected_inodes();
+
// hit pop
mds->balancer->hit_inode(mdr->now, newi, META_POP_IWR);
//mds->balancer->hit_dir(mdr->now, dn->get_dir(), META_POP_DWR);
CInode *newi = prepare_new_inode(mdr, dn->dir);
assert(newi);
+ newi->projected_parent = dn;
newi->inode.rdev = req->head.args.mknod.rdev;
newi->inode.mode = req->head.args.mknod.mode;
if ((newi->inode.mode & S_IFMT) == 0)
le->metablob.add_allocated_ino(newi->ino(), mds->idalloc->get_version());
version_t dirpv = predirty_dn_diri(mdr, dn, &le->metablob); // dir mtime too
le->metablob.add_dir_context(dn->dir);
+ predirty_nested(mdr, &le->metablob, newi);
le->metablob.add_primary_dentry(dn, true, newi, &newi->inode);
// log + wait
assert(newi);
// it's a directory.
+ newi->projected_parent = dn;
newi->inode.mode = req->head.args.mkdir.mode;
newi->inode.mode &= ~S_IFMT;
newi->inode.mode |= S_IFDIR;
le->metablob.add_allocated_ino(newi->ino(), mds->idalloc->get_version());
version_t dirpv = predirty_dn_diri(mdr, dn, &le->metablob); // dir mtime too
le->metablob.add_dir_context(dn->dir);
+ predirty_nested(mdr, &le->metablob, newi);
le->metablob.add_primary_dentry(dn, true, newi, &newi->inode);
le->metablob.add_dir(newdir, true, true); // dirty AND complete
assert(newi);
// it's a symlink
+ newi->projected_parent = dn;
newi->inode.mode &= ~S_IFMT;
newi->inode.mode |= S_IFLNK;
newi->inode.mode |= 0777; // ?
le->metablob.add_allocated_ino(newi->ino(), mds->idalloc->get_version());
version_t dirpv = predirty_dn_diri(mdr, dn, &le->metablob); // dir mtime too
le->metablob.add_dir_context(dn->dir);
- predirty_nested(mdr, &le->metablob, newi, dn->dir);
+ predirty_nested(mdr, &le->metablob, newi);
le->metablob.add_primary_dentry(dn, true, newi, &newi->inode);
// log + wait
version_t predirty_dn_diri(MDRequest *mdr, CDentry *dn, class EMetaBlob *blob);
void dirty_dn_diri(MDRequest *mdr, CDentry *dn, version_t dirpv);
- void predirty_nested(MDRequest *mdr, EMetaBlob *blob, CInode *in, CDir *parent);
+ void predirty_nested(MDRequest *mdr, EMetaBlob *blob, CInode *in);
// requests on existing inodes.
// journaled?
// add parent dn
- CDentry *parent = diri->get_parent_dn();
+ CDentry *parent = diri->get_projected_parent_dn();
add_dir_context(parent->get_dir(), mode);
add_dentry(parent, false);
}