From 4c9987a0bb6f1e6360422fac8ae98ea172effc0d Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 3 Mar 2008 15:26:34 -0800 Subject: [PATCH] mds, client: fixed races with cap release, open, size/mtime update journaling, and file size - there's now an explicit TRUNC cap message when file size decreases - mds holds wrlock during duration of size|mtime journaling --- src/client/Client.cc | 44 ++++++++++++++++--------------- src/include/ceph_fs.h | 1 + src/mds/Locker.cc | 61 +++++++++++++++++++++++++++++-------------- src/mds/Locker.h | 2 ++ src/mds/Server.cc | 3 +++ 5 files changed, 70 insertions(+), 41 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index 904ffc9a65617..c4a07d7bc551f 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -398,12 +398,10 @@ Inode* Client::insert_inode(Dir *dir, InodeStat *st, const string& dname) dn->inode->mask = st->mask; // or do we have newer size/mtime from writing? - if (dn->inode->file_caps() & CEPH_CAP_WR) { - if (dn->inode->file_wr_size > dn->inode->inode.size) - dn->inode->inode.size = dn->inode->file_wr_size; - if (dn->inode->file_wr_mtime > dn->inode->inode.mtime) - dn->inode->inode.mtime = dn->inode->file_wr_mtime; - } + if (dn->inode->file_wr_size > dn->inode->inode.size) + dn->inode->inode.size = dn->inode->file_wr_size; + if (dn->inode->file_wr_mtime > dn->inode->inode.mtime) + dn->inode->inode.mtime = dn->inode->file_wr_mtime; // symlink? if (dn->inode->inode.is_symlink()) { @@ -1205,6 +1203,20 @@ void Client::handle_file_caps(MClientFileCaps *m) return; } + // truncate? + if (m->get_op() == CEPH_CAP_OP_TRUNC) { + dout(10) << "handle_file_caps TRUNC on ino " << in->ino() + << " size " << in->inode.size << " -> " << m->get_size() + << dendl; + // trim filecache? + if (g_conf.client_oc) + in->fc.truncate(in->inode.size, m->get_size()); + + in->inode.size = in->file_wr_size = m->get_size(); + delete m; + return; + } + // don't want? if (in->file_caps_wanted() == 0) { dout(5) << "handle_file_caps on ino " << m->get_ino() @@ -1230,19 +1242,6 @@ void Client::handle_file_caps(MClientFileCaps *m) << " caps now " << cap_string(new_caps) << " was " << cap_string(old_caps) << dendl; - // did file size decrease? - if ((old_caps & (CEPH_CAP_RD|CEPH_CAP_WR)) == 0 && - (new_caps & (CEPH_CAP_RD|CEPH_CAP_WR)) != 0 && - in->inode.size > (loff_t)m->get_size()) { - dout(10) << "*** file size decreased from " << in->inode.size << " to " << m->get_size() << dendl; - - // trim filecache? - if (g_conf.client_oc) - in->fc.truncate(in->inode.size, m->get_size()); - - in->inode.size = in->file_wr_size = m->get_size(); - } - // update inode in->inode.size = m->get_size(); // might have updated size... FIXME this is overkill! in->inode.mtime = m->get_mtime(); @@ -3589,8 +3588,11 @@ int Client::ll_getattr(inodeno_t ino, struct stat *attr) tout << ino.val << std::endl; Inode *in = _ll_get_inode(ino); - fill_stat(in, attr); - return 0; + filepath fpath("", in->ino()); + int res = _do_lstat(fpath, STAT_MASK_ALL, &in); + if (res == 0) + fill_stat(in, attr); + return res; } int Client::ll_setattr(inodeno_t ino, struct stat *attr, int mask) diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index c72475adc2771..c558d7f2cd766 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -457,6 +457,7 @@ enum { CEPH_CAP_OP_GRANT, /* mds->client grant */ CEPH_CAP_OP_ACK, /* client->mds ack (if prior grant was a recall) */ CEPH_CAP_OP_REQUEST, /* client->mds request (update wanted bits) */ + CEPH_CAP_OP_TRUNC, /* mds->client trunc notify (invalidate size+mtime) */ CEPH_CAP_OP_EXPORT, /* mds has exported the cap */ CEPH_CAP_OP_IMPORT /* mds has imported the cap from specified mds */ }; diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 56e06d2f7607d..07972bb09798e 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -456,14 +456,20 @@ struct C_Locker_FileUpdate_finish : public Context { in->get(CInode::PIN_PTRWAITER); } void finish(int r) { - in->pop_and_dirty_projected_inode(ls); - in->put(CInode::PIN_PTRWAITER); - locker->file_wrlock_finish(&in->filelock); - if (share && in->is_auth() && in->filelock.is_stable()) - locker->share_new_file_max(in); + locker->file_update_finish(in, ls, share); } }; +void Locker::file_update_finish(CInode *in, LogSegment *ls, bool share) +{ + dout(10) << "file_update_finish on " << *in << dendl; + in->pop_and_dirty_projected_inode(ls); + in->put(CInode::PIN_PTRWAITER); + file_wrlock_finish(&in->filelock); + if (share && in->is_auth() && in->filelock.is_stable()) + share_new_file_max(in); +} + Capability* Locker::issue_new_caps(CInode *in, int mode, Session *session) @@ -543,7 +549,6 @@ Capability* Locker::issue_new_caps(CInode *in, } - bool Locker::issue_caps(CInode *in) { // allowed caps are determined by the lock mode. @@ -592,6 +597,23 @@ bool Locker::issue_caps(CInode *in) return (nissued == 0); // true if no re-issued, no callbacks } +void Locker::issue_truncate(CInode *in) +{ + dout(7) << "issue_truncate on " << *in << dendl; + + for (map::iterator it = in->client_caps.begin(); + it != in->client_caps.end(); + it++) { + Capability *cap = it->second; + mds->send_message_client(new MClientFileCaps(CEPH_CAP_OP_TRUNC, + in->inode, + cap->get_last_seq(), + cap->pending(), + cap->wanted()), + it->first); + } +} + void Locker::revoke_stale_caps(Session *session) { dout(10) << "revoke_stale_caps for " << session->inst.name << dendl; @@ -836,12 +858,16 @@ void Locker::handle_client_file_caps(MClientFileCaps *m) inode_t *latest = in->get_projected_inode(); // no more writers? - int wanted = in->get_caps_wanted(); bool no_wr = false; if (latest->max_size && (wanted & (CEPH_CAP_WR|CEPH_CAP_WRBUFFER)) == 0) no_wr = true; + utime_t atime = m->get_atime(); + utime_t mtime = m->get_mtime(); + off_t size = m->get_size(); + // atime|mtime|size? + bool had_or_has_wr = (had|has) & CEPH_CAP_WR; bool dirty = false; if (atime > latest->atime) dirty = true; @@ -866,11 +892,6 @@ void Locker::handle_client_file_caps(MClientFileCaps *m) !in->is_base()) { // FIXME.. what about root inode mtime/atime? EUpdate *le = new EUpdate(mds->mdlog, "size|max_size|mtime|atime update"); inode_t *pi = in->project_inode(); - /* - * FIXME HACK: set current inode too, until we get - * FileLock to grab a reference here or some such - * thing... - */ pi->version = in->pre_dirty(); if (no_wr) { dout(7) << " last wr-wanted cap, max_size=0" << dendl; @@ -879,22 +900,22 @@ void Locker::handle_client_file_caps(MClientFileCaps *m) int64_t inc = in->get_layout_size_increment(); int64_t new_max = ROUND_UP_TO(latest->size + inc, inc); dout(7) << " increasing max_size " << pi->max_size << " to " << new_max << dendl; - in->inode.max_size = pi->max_size = new_max; + pi->max_size = new_max; } if (mtime > latest->mtime) { dout(7) << " taking mtime " << mtime << " > " << in->inode.mtime << " for " << *in << dendl; - in->inode.mtime = pi->mtime = mtime; + pi->mtime = mtime; } if (size > latest->size) { dout(7) << " taking size " << size << " > " << in->inode.size << " for " << *in << dendl; - in->inode.size = pi->size = size; + pi->size = size; } if (atime > latest->atime) { dout(7) << " taking atime " << atime << " > " << in->inode.atime << " for " << *in << dendl; - in->inode.atime = pi->atime = atime; + pi->atime = atime; } le->metablob.add_dir_context(in->get_parent_dir()); le->metablob.add_primary_dentry(in->parent, true, 0, pi); @@ -909,8 +930,6 @@ void Locker::handle_client_file_caps(MClientFileCaps *m) else if (in->is_auth()) file_eval(&in->filelock); - //in->finish_waiting(CInode::WAIT_CAPS, 0); // note: any users for this? - delete m; } @@ -2654,7 +2673,8 @@ bool Locker::file_sync(FileLock *lock) else if (lock->get_state() == LOCK_MIXED) { // writers? - if (issued & CEPH_CAP_WR) { + if ((issued & CEPH_CAP_WR) || + lock->is_wrlocked()) { // gather client write caps lock->set_state(LOCK_GSYNCM); lock->get_parent()->auth_pin(); @@ -2675,7 +2695,8 @@ bool Locker::file_sync(FileLock *lock) else if (lock->get_state() == LOCK_LONER) { // writers? - if (issued & CEPH_CAP_WR) { + if ((issued & CEPH_CAP_WR) || + lock->is_wrlocked()) { // gather client write caps lock->set_state(LOCK_GSYNCL); lock->get_parent()->auth_pin(); diff --git a/src/mds/Locker.h b/src/mds/Locker.h index b72318815c1c4..7e0fcde9dc4e4 100644 --- a/src/mds/Locker.h +++ b/src/mds/Locker.h @@ -183,6 +183,7 @@ protected: version_t issue_file_data_version(CInode *in); Capability* issue_new_caps(CInode *in, int mode, Session *session); bool issue_caps(CInode *in); + void issue_truncate(CInode *in); void revoke_stale_caps(Session *session); void resume_stale_caps(Session *session); @@ -192,6 +193,7 @@ protected: void request_inode_file_caps(CInode *in); void handle_inode_file_caps(class MInodeFileCaps *m); + void file_update_finish(CInode *in, LogSegment *ls, bool share); void share_new_file_max(CInode *in); friend class C_MDL_RequestInodeFileCaps; diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 628b509aed7ea..3ed57f89d0b1a 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -3749,6 +3749,9 @@ public: in->inode.mtime = ctime; in->pop_and_dirty_projected_inode(mdr->ls); + // notify any clients + mds->locker->issue_truncate(in); + // purge mds->mdcache->purge_inode(in, size, in->inode.size, mdr->ls); mds->mdcache->wait_for_purge(in, size, -- 2.39.5