From: Sage Weil Date: Thu, 22 May 2008 22:23:39 +0000 (-0700) Subject: mds: move predirty_nested into locker, and update with file size X-Git-Tag: v0.3~170^2~88 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=076d902c5e90369dc193296d5f06ae9f47e6fa26;p=ceph.git mds: move predirty_nested into locker, and update with file size --- diff --git a/src/client/Client.cc b/src/client/Client.cc index 09bb2d813cb..7590f1efa4c 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -2244,7 +2244,10 @@ int Client::fill_stat(Inode *in, struct stat *st) st->st_ctime = MAX(in->inode.ctime, in->inode.mtime); st->st_atime = in->inode.atime; st->st_mtime = in->inode.mtime; - st->st_size = in->inode.size; + if (in->inode.is_dir()) + st->st_size = in->inode.nested.rbytes; + else + st->st_size = in->inode.size; st->st_blksize = MAX(ceph_file_layout_su(in->inode.layout), 4096); st->st_blocks = in->inode.size ? DIV_ROUND_UP(in->inode.size, st->st_blksize):0; return in->lease_mask; diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 93a460c45e9..0d826621646 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -70,6 +70,9 @@ ostream& operator<<(ostream& out, CInode& in) if (in.is_freezing_inode()) out << " FREEZING=" << in.auth_pin_freeze_allowance; if (in.is_frozen_inode()) out << " FROZEN"; + out << " s=" << in.inode.size; + out << " rb=" << in.inode.nested.rbytes << "/" << in.inode.accounted_nested.rbytes; + // locks out << " " << in.authlock; out << " " << in.linklock; @@ -409,7 +412,7 @@ void CInode::name_stray_dentry(string& dname) version_t CInode::pre_dirty() { - assert(parent); + assert(parent || projected_parent); version_t pv; if (projected_parent) pv = projected_parent->pre_dirty(get_projected_version()); diff --git a/src/mds/CInode.h b/src/mds/CInode.h index 8ac19a921c9..5fe837c0a1a 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -287,6 +287,7 @@ public: inodeno_t ino() const { return inode.ino; } inode_t& get_inode() { return inode; } CDentry* get_parent_dn() { return parent; } + CDentry* get_projected_parent_dn() { return projected_parent ? projected_parent:parent; } CDir *get_parent_dir(); CInode *get_parent_inode(); diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index ae0d83edb0c..459118aedac 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -471,22 +471,32 @@ version_t Locker::issue_file_data_version(CInode *in) struct C_Locker_FileUpdate_finish : public Context { Locker *locker; CInode *in; + list nest_updates; LogSegment *ls; bool share; - C_Locker_FileUpdate_finish(Locker *l, CInode *i, LogSegment *s, bool e=false) : + C_Locker_FileUpdate_finish(Locker *l, CInode *i, LogSegment *s, list &ls, bool e=false) : locker(l), in(i), ls(s), share(e) { + nest_updates.swap(ls); in->get(CInode::PIN_PTRWAITER); } void finish(int r) { - locker->file_update_finish(in, ls, share); + locker->file_update_finish(in, ls, nest_updates, share); } }; -void Locker::file_update_finish(CInode *in, LogSegment *ls, bool share) +void Locker::file_update_finish(CInode *in, LogSegment *ls, list &nest_updates, bool share) { dout(10) << "file_update_finish on " << *in << dendl; in->pop_and_dirty_projected_inode(ls); in->put(CInode::PIN_PTRWAITER); + + for (list::iterator p = nest_updates.begin(); + p != nest_updates.end(); + p++) { + (*p)->pop_and_dirty_projected_inode(ls); + scatter_wrlock_finish(&(*p)->dirlock, 0); + } + file_wrlock_finish(&in->filelock); if (share && in->is_auth() && in->filelock.is_stable()) share_inode_max_size(in); @@ -842,11 +852,13 @@ bool Locker::check_inode_max_size(CInode *in, bool forcewrlock) pi->max_size = new_max; EOpen *le = new EOpen(mds->mdlog); le->metablob.add_dir_context(in->get_parent_dir()); + list nest_updates; + predirty_nested(&le->metablob, in, nest_updates); le->metablob.add_primary_dentry(in->parent, true, 0, pi); LogSegment *ls = mds->mdlog->get_current_segment(); le->add_ino(in->ino()); ls->open_files.push_back(&in->xlist_open_file); - mds->mdlog->submit_entry(le, new C_Locker_FileUpdate_finish(this, in, ls, true)); + mds->mdlog->submit_entry(le, new C_Locker_FileUpdate_finish(this, in, ls, nest_updates, true)); file_wrlock_start(&in->filelock, forcewrlock); // wrlock for duration of journal return true; } @@ -1036,9 +1048,11 @@ void Locker::handle_client_file_caps(MClientFileCaps *m) pi->time_warp_seq = m->get_time_warp_seq(); } le->metablob.add_dir_context(in->get_parent_dir()); + list nest_updates; + predirty_nested(&le->metablob, in, nest_updates); le->metablob.add_primary_dentry(in->parent, true, 0, pi); LogSegment *ls = mds->mdlog->get_current_segment(); - mds->mdlog->submit_entry(le, new C_Locker_FileUpdate_finish(this, in, ls, change_max)); + mds->mdlog->submit_entry(le, new C_Locker_FileUpdate_finish(this, in, ls, nest_updates, change_max)); file_wrlock_start(&in->filelock); // wrlock for duration of journal } @@ -1209,6 +1223,92 @@ void Locker::revoke_client_leases(SimpleLock *lock) // nested --------------------------------------------------------------- +void Locker::predirty_nested(EMetaBlob *blob, CInode *in, list &ls) +{ + assert(ls.empty()); + + CDir *parent = in->get_projected_parent_dn()->get_dir(); + blob->add_dir_context(parent); + + // initial diff from *in + inode_t *curi = in->get_projected_inode(); + __u64 drbytes; + __u64 drfiles; + utime_t rctime; + if (in->is_dir()) { + drbytes = curi->nested.rbytes - curi->accounted_nested.rbytes; + drfiles = curi->nested.rfiles - curi->accounted_nested.rfiles; + rctime = MAX(curi->ctime, curi->nested.rctime); + } else { + drbytes = curi->size - curi->accounted_nested.rbytes; + drfiles = 1 - curi->accounted_nested.rfiles; + rctime = curi->ctime; + } + + dout(10) << "predirty_nested delta " << drbytes << " bytes / " << drfiles << " files from " << *in << dendl; + + // build list of inodes to wrlock, dirty, and update + CInode *cur = in; + while (parent) { + assert(cur->is_auth()); + assert(parent->is_auth()); + + // opportunistically adjust parent dirfrag + CInode *pin = parent->get_inode(); + + dout(10) << "predirty_nested delta " << drbytes << " bytes / " << drfiles << " files for " << *pin << dendl; + if (pin->is_base()) + break; + + if (!scatter_wrlock_try(&pin->dirlock)) { + dout(10) << "predirty_nested can't wrlock " << pin->dirlock << " on " << *pin << dendl; + break; + } + + ls.push_back(pin); + + // FIXME + if (!pin->is_auth()) { + assert(0); + break; + } + + // project update + version_t ppv = pin->pre_dirty(); + inode_t *pi = pin->project_inode(); + pi->version = ppv; + pi->nested.rbytes += drbytes; + pi->nested.rfiles += drfiles; + pi->nested.rctime = rctime; + + frag_t fg = parent->dirfrag().frag; + pin->dirfrag_nested[fg].rbytes += drbytes; + pin->dirfrag_nested[fg].rfiles += drfiles; + pin->dirfrag_nested[fg].rctime = rctime; + + curi->accounted_nested.rbytes += drbytes; + curi->accounted_nested.rfiles += drfiles; + curi->accounted_nested.rctime = rctime; + + cur = pin; + curi = pi; + parent = cur->get_projected_parent_dn()->get_dir(); + + drbytes = curi->nested.rbytes - curi->accounted_nested.rbytes; + drfiles = curi->nested.rfiles - curi->accounted_nested.rfiles; + rctime = MAX(curi->ctime, curi->nested.rctime); + } + + // now, stick it in the blob + for (list::iterator p = ls.begin(); + p != ls.end(); + p++) { + CInode *cur = *p; + inode_t *pi = cur->get_projected_inode(); + blob->add_primary_dentry(cur->get_parent_dn(), true, 0, pi); + } +} + // locks ---------------------------------------------------------------- @@ -1791,11 +1891,8 @@ void Locker::scatter_rdlock_finish(ScatterLock *lock, MDRequest *mdr) } -bool Locker::scatter_wrlock_start(ScatterLock *lock, MDRequest *mdr) +bool Locker::scatter_wrlock_try(ScatterLock *lock) { - dout(7) << "scatter_wrlock_start on " << *lock - << " on " << *lock->get_parent() << dendl; - // pre-twiddle? if (lock->get_parent()->is_auth() && !lock->get_parent()->is_replicated() && @@ -1809,6 +1906,18 @@ bool Locker::scatter_wrlock_start(ScatterLock *lock, MDRequest *mdr) // can wrlock? if (lock->can_wrlock()) { lock->get_wrlock(); + return true; + } + + return false; +} + +bool Locker::scatter_wrlock_start(ScatterLock *lock, MDRequest *mdr) +{ + dout(7) << "scatter_wrlock_start on " << *lock + << " on " << *lock->get_parent() << dendl; + + if (scatter_wrlock_try(lock)) { mdr->wrlocks.insert(lock); mdr->locks.insert(lock); return true; diff --git a/src/mds/Locker.h b/src/mds/Locker.h index 4195f4a25a4..0c09a348d5d 100644 --- a/src/mds/Locker.h +++ b/src/mds/Locker.h @@ -135,9 +135,8 @@ protected: void scatter_tempsync(ScatterLock *lock); bool scatter_rdlock_start(ScatterLock *lock, MDRequest *mdr); void scatter_rdlock_finish(ScatterLock *lock, MDRequest *mdr); -public: - bool scatter_wrlock_start(ScatterLock *lock, MDRequest *mdr); // public for Server's predirty_nested -protected: + bool scatter_wrlock_try(ScatterLock *lock); + bool scatter_wrlock_start(ScatterLock *lock, MDRequest *mdr); void scatter_wrlock_finish(ScatterLock *lock, MDRequest *mdr); void scatter_writebehind(ScatterLock *lock); @@ -153,6 +152,9 @@ protected: }; void scatter_writebehind_finish(ScatterLock *lock, LogSegment *ls); +public: + void predirty_nested(class EMetaBlob *blob, CInode *in, list &ls); + // local protected: bool local_wrlock_start(LocalLock *lock, MDRequest *mdr); @@ -197,7 +199,7 @@ protected: void request_inode_file_caps(CInode *in); void handle_inode_file_caps(class MInodeFileCaps *m); - void file_update_finish(CInode *in, LogSegment *ls, bool share); + void file_update_finish(CInode *in, LogSegment *ls, list &nest_updates, bool share); public: bool check_inode_max_size(CInode *in, bool forcewrlock=false); private: diff --git a/src/mds/Server.cc b/src/mds/Server.cc index febbe399c45..33fde1bec8d 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -1539,86 +1539,23 @@ void Server::dirty_dn_diri(MDRequest *mdr, CDentry *dn, version_t dirpv) } } -void Server::predirty_nested(MDRequest *mdr, EMetaBlob *blob, CInode *in, CDir *parent) +void Server::predirty_nested(MDRequest *mdr, EMetaBlob *blob, CInode *in) { - if (!parent) - parent = in->get_parent_dir(); - - // initial diff from *in - inode_t *curi = in->get_projected_inode(); - __u64 drbytes; - __u64 drfiles; - utime_t rctime; - if (in->is_dir()) { - drbytes = curi->nested.rbytes - curi->accounted_nested.rbytes; - drfiles = curi->nested.rfiles - curi->accounted_nested.rfiles; - rctime = MAX(curi->ctime, curi->nested.rctime); - } else { - drbytes = curi->size - curi->accounted_nested.rbytes; - drfiles = 1 - curi->accounted_nested.rfiles; - rctime = curi->ctime; - } - - blob->add_dir_context(in->get_parent_dir()); - - // build list of inodes to wrlock, dirty, and update list ls; - CInode *cur = in; - while (parent) { - assert(cur->is_auth()); - assert(parent->is_auth()); - - // opportunistically adjust parent dirfrag - CInode *pin = parent->get_inode(); - if (!pin->dirlock.can_wrlock()) { - dout(10) << " can't wrlock " << pin->dirlock << " on " << *pin << dendl; - break; - } - bool r = mds->locker->scatter_wrlock_start(&pin->dirlock, mdr); - assert(r); - - if (!pin->is_auth()) { - break; - } - - // project update - version_t ppv = pin->pre_dirty(); - inode_t *pi = pin->project_inode(); - pi->version = ppv; - pi->nested.rbytes += drbytes; - pi->nested.rfiles += drfiles; - pi->nested.rctime = rctime; - mdr->add_projected_inode(pin); - ls.push_back(pin); - - frag_t fg = parent->dirfrag().frag; - pin->dirfrag_nested[fg].rbytes += drbytes; - pin->dirfrag_nested[fg].rfiles += drfiles; - pin->dirfrag_nested[fg].rctime = rctime; - - curi->accounted_nested.rbytes += drbytes; - curi->accounted_nested.rfiles += drfiles; - curi->accounted_nested.rctime = rctime; - - cur = pin; - curi = pi; - parent = cur->get_parent_dir(); - } + mds->locker->predirty_nested(blob, in, ls); - // now, stick it in the blob for (list::iterator p = ls.begin(); p != ls.end(); p++) { - CInode *cur = *p; - inode_t *pi = cur->get_projected_inode(); - blob->add_primary_dentry(cur->get_parent_dn(), true, 0, pi); + SimpleLock *lock = &(*p)->dirlock; + mdr->wrlocks.insert(lock); + mdr->locks.insert(lock); + mdr->add_projected_inode(*p); } } - - // =============================================================================== // STAT @@ -2092,7 +2029,8 @@ public: // dir inode's mtime mds->server->dirty_dn_diri(mdr, dn, dirpv); - + mdr->pop_and_dirty_projected_inodes(); + // hit pop mds->balancer->hit_inode(mdr->now, newi, META_POP_IWR); //mds->balancer->hit_dir(mdr->now, dn->get_dir(), META_POP_DWR); @@ -2116,6 +2054,7 @@ void Server::handle_client_mknod(MDRequest *mdr) CInode *newi = prepare_new_inode(mdr, dn->dir); assert(newi); + newi->projected_parent = dn; newi->inode.rdev = req->head.args.mknod.rdev; newi->inode.mode = req->head.args.mknod.mode; if ((newi->inode.mode & S_IFMT) == 0) @@ -2131,6 +2070,7 @@ void Server::handle_client_mknod(MDRequest *mdr) le->metablob.add_allocated_ino(newi->ino(), mds->idalloc->get_version()); version_t dirpv = predirty_dn_diri(mdr, dn, &le->metablob); // dir mtime too le->metablob.add_dir_context(dn->dir); + predirty_nested(mdr, &le->metablob, newi); le->metablob.add_primary_dentry(dn, true, newi, &newi->inode); // log + wait @@ -2154,6 +2094,7 @@ void Server::handle_client_mkdir(MDRequest *mdr) assert(newi); // it's a directory. + newi->projected_parent = dn; newi->inode.mode = req->head.args.mkdir.mode; newi->inode.mode &= ~S_IFMT; newi->inode.mode |= S_IFDIR; @@ -2174,6 +2115,7 @@ void Server::handle_client_mkdir(MDRequest *mdr) le->metablob.add_allocated_ino(newi->ino(), mds->idalloc->get_version()); version_t dirpv = predirty_dn_diri(mdr, dn, &le->metablob); // dir mtime too le->metablob.add_dir_context(dn->dir); + predirty_nested(mdr, &le->metablob, newi); le->metablob.add_primary_dentry(dn, true, newi, &newi->inode); le->metablob.add_dir(newdir, true, true); // dirty AND complete @@ -2211,6 +2153,7 @@ void Server::handle_client_symlink(MDRequest *mdr) assert(newi); // it's a symlink + newi->projected_parent = dn; newi->inode.mode &= ~S_IFMT; newi->inode.mode |= S_IFLNK; newi->inode.mode |= 0777; // ? @@ -2225,7 +2168,7 @@ void Server::handle_client_symlink(MDRequest *mdr) le->metablob.add_allocated_ino(newi->ino(), mds->idalloc->get_version()); version_t dirpv = predirty_dn_diri(mdr, dn, &le->metablob); // dir mtime too le->metablob.add_dir_context(dn->dir); - predirty_nested(mdr, &le->metablob, newi, dn->dir); + predirty_nested(mdr, &le->metablob, newi); le->metablob.add_primary_dentry(dn, true, newi, &newi->inode); // log + wait diff --git a/src/mds/Server.h b/src/mds/Server.h index c28a7726f2e..840492aae27 100644 --- a/src/mds/Server.h +++ b/src/mds/Server.h @@ -102,7 +102,7 @@ public: version_t predirty_dn_diri(MDRequest *mdr, CDentry *dn, class EMetaBlob *blob); void dirty_dn_diri(MDRequest *mdr, CDentry *dn, version_t dirpv); - void predirty_nested(MDRequest *mdr, EMetaBlob *blob, CInode *in, CDir *parent); + void predirty_nested(MDRequest *mdr, EMetaBlob *blob, CInode *in); // requests on existing inodes. diff --git a/src/mds/events/EMetaBlob.h b/src/mds/events/EMetaBlob.h index 85974474e76..efa266fb1b6 100644 --- a/src/mds/events/EMetaBlob.h +++ b/src/mds/events/EMetaBlob.h @@ -465,7 +465,7 @@ private: // journaled? // add parent dn - CDentry *parent = diri->get_parent_dn(); + CDentry *parent = diri->get_projected_parent_dn(); add_dir_context(parent->get_dir(), mode); add_dentry(parent, false); }