From: Sage Weil Date: Thu, 29 Jan 2009 22:47:31 +0000 (-0800) Subject: mds: truncate changes. not complete. X-Git-Tag: v0.7~248 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=e0dcc7cf496bef4a10cc142a3f8a5d8fd1aaa84c;p=ceph.git mds: truncate changes. not complete. We don't clean up after we finish our truncate. --- diff --git a/src/mds/CInode.h b/src/mds/CInode.h index 9f7edef839fc..7a1fe309c06a 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -113,15 +113,16 @@ class CInode : public MDSCacheObject { static const __u64 WAIT_ANCHORED = (1<<1); static const __u64 WAIT_UNANCHORED = (1<<2); static const __u64 WAIT_FROZEN = (1<<3); + static const __u64 WAIT_TRUNC = (1<<4); - static const int WAIT_AUTHLOCK_OFFSET = 4; - static const int WAIT_LINKLOCK_OFFSET = 4 + SimpleLock::WAIT_BITS; - static const int WAIT_DIRFRAGTREELOCK_OFFSET = 4 + 2*SimpleLock::WAIT_BITS; - static const int WAIT_FILELOCK_OFFSET = 4 + 3*SimpleLock::WAIT_BITS; - static const int WAIT_VERSIONLOCK_OFFSET = 4 + 4*SimpleLock::WAIT_BITS; - static const int WAIT_XATTRLOCK_OFFSET = 4 + 5*SimpleLock::WAIT_BITS; - static const int WAIT_SNAPLOCK_OFFSET = 4 + 6*SimpleLock::WAIT_BITS; - static const int WAIT_NESTLOCK_OFFSET = 4 + 7*SimpleLock::WAIT_BITS; + static const int WAIT_AUTHLOCK_OFFSET = 5; + static const int WAIT_LINKLOCK_OFFSET = 5 + SimpleLock::WAIT_BITS; + static const int WAIT_DIRFRAGTREELOCK_OFFSET = 5 + 2*SimpleLock::WAIT_BITS; + static const int WAIT_FILELOCK_OFFSET = 5 + 3*SimpleLock::WAIT_BITS; + static const int WAIT_VERSIONLOCK_OFFSET = 5 + 4*SimpleLock::WAIT_BITS; + static const int WAIT_XATTRLOCK_OFFSET = 5 + 5*SimpleLock::WAIT_BITS; + static const int WAIT_SNAPLOCK_OFFSET = 5 + 6*SimpleLock::WAIT_BITS; + static const int WAIT_NESTLOCK_OFFSET = 5 + 7*SimpleLock::WAIT_BITS; static const __u64 WAIT_ANY_MASK = (__u64)(-1); diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 78bbdc0bf2ea..e4f19269b85e 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -4078,6 +4078,31 @@ void MDCache::set_root(CInode *in) +void MDCache::truncate_inode(CInode *in, LogSegment *ls) +{ + inode_t *pi = in->get_projected_inode(); + dout(10) << "truncate_inode " << pi->truncate_from << " -> " << pi->truncate_size + << " on " << *in + << dendl; + + SnapRealm *realm = in->find_snaprealm(); + SnapContext nullsnap; + const SnapContext *snapc; + if (realm) { + dout(10) << " realm " << *realm << dendl; + snapc = &realm->get_snap_context(); + } else { + dout(10) << " NO realm, using null context" << dendl; + snapc = &nullsnap; + assert(in->last == CEPH_NOSNAP); + } + dout(10) << "truncate_inode snapc " << snapc << " on " << *in << dendl; + mds->filer->truncate(in->inode.ino, &in->inode.layout, *snapc, + pi->truncate_size, pi->truncate_from-pi->truncate_size, pi->truncate_seq, 0, + 0, 0);//new C_MDC_PurgeFinish(this, in, newsize, oldsize)); + + +} // ************** // Inode purging -- reliably removing deleted file's objects @@ -4106,7 +4131,7 @@ public: }; /* purge_inode in - * will be called by on unlink or rmdir or truncate or purge + * will be called by on unlink or rmdir or purge * caller responsible for journaling a matching EUpdate */ void MDCache::purge_inode(CInode *in, loff_t newsize, loff_t oldsize, LogSegment *ls) diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index 956da55c62e9..b78ecdeae397 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -823,6 +823,9 @@ public: void rename_file(CDentry *srcdn, CDentry *destdn); public: + // truncate + void truncate_inode(CInode *in, LogSegment *ls); + // inode purging void purge_inode(CInode *in, loff_t newsize, loff_t oldsize, LogSegment *ls); void _do_purge_inode(CInode *in, loff_t newsize, loff_t oldsize); diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 892796a25b1b..ba3fd9cefbb4 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -1401,7 +1401,9 @@ CInode* Server::prepare_new_inode(MDRequest *mdr, CDir *dir, inodeno_t useino) in->inode.version = 1; in->inode.nlink = 1; // FIXME in->inode.layout = g_default_file_layout; - + + in->inode.truncate_size = -1ull; // not truncated, yet! + in->inode.uid = mdr->client_request->get_caller_uid(); in->inode.gid = mdr->client_request->get_caller_gid(); in->inode.ctime = in->inode.mtime = in->inode.atime = mdr->now; // now @@ -4768,15 +4770,13 @@ void Server::handle_slave_rename_prep_ack(MDRequest *mdr, MMDSSlaveRequest *ack) // =================================== // TRUNCATE, FSYNC -class C_MDS_truncate_purged : public Context { +struct DelayTrunc : public Context { MDS *mds; - MDRequest *mdr; -public: - C_MDS_truncate_purged(MDS *m, MDRequest *r) : - mds(m), mdr(r) {} + CInode *in; + LogSegment *ls; + DelayTrunc(MDS *m, CInode *i, LogSegment *l) : mds(m), in(i), ls(l) {} void finish(int r) { - assert(r == 0); - mds->server->reply_request(mdr, 0); + mds->mdcache->truncate_inode(in, ls); } }; @@ -4790,24 +4790,18 @@ public: void finish(int r) { assert(r == 0); - // apply to cache - __u64 old_size = in->inode.size; + // apply in->pop_and_dirty_projected_inode(mdr->ls); - mdr->apply(); // notify any clients mds->locker->issue_truncate(in); + //mds->mdcache->truncate_inode(in, mdr->ls); + mds->timer.add_event_after(10.0, new DelayTrunc(mds, in, mdr->ls)); - if (old_size <= in->inode.size) { - // forward truncate. done! - mds->server->reply_request(mdr, 0); - } else { - // purge - mds->mdcache->purge_inode(in, in->inode.size, old_size, mdr->ls); - mds->mdcache->wait_for_purge(in, in->inode.size, - new C_MDS_truncate_purged(mds, mdr)); - } + mds->balancer->hit_inode(mdr->now, in, META_POP_IWR); + + mds->server->reply_request(mdr, 0); } }; @@ -4833,39 +4827,55 @@ void Server::handle_client_truncate(MDRequest *mdr) set rdlocks = mdr->rdlocks; set wrlocks = mdr->wrlocks; set xlocks = mdr->xlocks; - xlocks.insert(&cur->filelock); + wrlocks.insert(&cur->filelock); mds->locker->include_snap_rdlocks(rdlocks, cur); if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks)) return; // already the correct size? - if (cur->inode.size == req->head.args.truncate.length) { + inode_t *pi = cur->get_projected_inode(); + __u64 old_size = MAX(pi->size, req->head.args.truncate.old_length); + if (old_size == req->head.args.truncate.length) { reply_request(mdr, 0); return; } + if (old_size > req->head.args.truncate.length && pi->is_truncating()) { + dout(10) << " waiting for pending truncate from " << pi->truncate_from + << " to " << pi->truncate_size << " to complete on " << *cur << dendl; + cur->add_waiter(CInode::WAIT_TRUNC, new C_MDS_RetryRequest(mdcache, mdr)); + return; + } + // prepare version_t pdv = cur->pre_dirty(); utime_t ctime = g_clock.real_now(); - Context *fin = new C_MDS_truncate_logged(mds, mdr, cur); - - // log + wait + mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "truncate"); le->metablob.add_client_req(mdr->reqid); le->metablob.add_inode_truncate(cur->ino(), req->head.args.truncate.length, cur->inode.size); - inode_t *pi = cur->project_inode(); + pi = cur->project_inode(); pi->mtime = ctime; pi->ctime = ctime; pi->version = pdv; - pi->size = req->head.args.truncate.length; - pi->rstat.rbytes = pi->size; - pi->truncate_seq++; + if (old_size > req->head.args.truncate.length) { + // truncate to smaller size + pi->truncate_from = old_size; + pi->size = req->head.args.truncate.length; + pi->rstat.rbytes = pi->size; + pi->truncate_size = pi->size; + pi->truncate_seq++; + } else { + // truncate to larger size + pi->size = req->head.args.truncate.length; + pi->rstat.rbytes = pi->size; + } mdcache->predirty_journal_parents(mdr, &le->metablob, cur, 0, PREDIRTY_PRIMARY, false); mdcache->journal_dirty_inode(mdr, &le->metablob, cur); - mdlog->submit_entry(le, fin); + journal_and_reply(mdr, cur, 0, le, new C_MDS_truncate_logged(mds, mdr, cur)); } diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h index d2410d35dc32..c6232efaa710 100644 --- a/src/mds/mdstypes.h +++ b/src/mds/mdstypes.h @@ -295,7 +295,7 @@ struct inode_t { uint64_t size; // on directory, # dentries uint64_t max_size; // client(s) are auth to write this much... uint32_t truncate_seq; - uint64_t truncate_size; + uint64_t truncate_size, truncate_from; utime_t mtime; // file data modify time. utime_t atime; // file data access time. uint32_t time_warp_seq; // count of (potential) mtime/atime timewarps (i.e., utimes()) @@ -314,6 +314,8 @@ struct inode_t { bool is_dir() const { return (mode & S_IFMT) == S_IFDIR; } bool is_file() const { return (mode & S_IFMT) == S_IFREG; } + bool is_truncating() const { return truncate_size != -1ull; } + void encode(bufferlist &bl) const { ::encode(ino, bl); ::encode(rdev, bl); @@ -331,6 +333,7 @@ struct inode_t { ::encode(max_size, bl); ::encode(truncate_seq, bl); ::encode(truncate_size, bl); + ::encode(truncate_from, bl); ::encode(mtime, bl); ::encode(atime, bl); ::encode(time_warp_seq, bl); @@ -360,6 +363,7 @@ struct inode_t { ::decode(max_size, p); ::decode(truncate_seq, p); ::decode(truncate_size, p); + ::decode(truncate_from, p); ::decode(mtime, p); ::decode(atime, p); ::decode(time_warp_seq, p); diff --git a/src/osdc/Filer.h b/src/osdc/Filer.h index 7073a26cad71..2c1d4f500dbb 100644 --- a/src/osdc/Filer.h +++ b/src/osdc/Filer.h @@ -127,6 +127,45 @@ class Filer { return 0; } + int truncate(inodeno_t ino, + ceph_file_layout *layout, + const SnapContext& snapc, + __u64 offset, + size_t len, + __u32 truncate_seq, + int flags, + Context *onack, + Context *oncommit) { + bufferlist bl; + vector extents; + file_to_extents(ino, layout, CEPH_NOSNAP, offset, len, extents); + if (extents.size() == 1) { + vector ops(1); + memset(&ops[0], 0, sizeof(ops[0])); + ops[0].op = CEPH_OSD_OP_TRIMTRUNC; + ops[0].truncate_seq = truncate_seq; + ops[0].truncate_size = extents[0].offset; + objecter->modify(extents[0].oid, extents[0].layout, ops, snapc, bl, flags, onack, oncommit); + } else { + C_Gather *gack = 0, *gcom = 0; + if (onack) + gack = new C_Gather(onack); + if (oncommit) + gcom = new C_Gather(oncommit); + for (vector::iterator p = extents.begin(); p != extents.end(); p++) { + vector ops(1); + memset(&ops[0], 0, sizeof(ops[0])); + ops[0].op = CEPH_OSD_OP_TRIMTRUNC; + ops[0].truncate_size = p->offset; + ops[0].truncate_seq = truncate_seq; + objecter->modify(extents[0].oid, p->layout, ops, snapc, bl, flags, + gack ? gack->new_sub():0, + gcom ? gcom->new_sub():0); + } + } + return 0; + } + int zero(inodeno_t ino, ceph_file_layout *layout, const SnapContext& snapc,