We don't clean up after we finish our truncate.
static const __u64 WAIT_ANCHORED = (1<<1);
static const __u64 WAIT_UNANCHORED = (1<<2);
static const __u64 WAIT_FROZEN = (1<<3);
+ static const __u64 WAIT_TRUNC = (1<<4);
- static const int WAIT_AUTHLOCK_OFFSET = 4;
- static const int WAIT_LINKLOCK_OFFSET = 4 + SimpleLock::WAIT_BITS;
- static const int WAIT_DIRFRAGTREELOCK_OFFSET = 4 + 2*SimpleLock::WAIT_BITS;
- static const int WAIT_FILELOCK_OFFSET = 4 + 3*SimpleLock::WAIT_BITS;
- static const int WAIT_VERSIONLOCK_OFFSET = 4 + 4*SimpleLock::WAIT_BITS;
- static const int WAIT_XATTRLOCK_OFFSET = 4 + 5*SimpleLock::WAIT_BITS;
- static const int WAIT_SNAPLOCK_OFFSET = 4 + 6*SimpleLock::WAIT_BITS;
- static const int WAIT_NESTLOCK_OFFSET = 4 + 7*SimpleLock::WAIT_BITS;
+ static const int WAIT_AUTHLOCK_OFFSET = 5;
+ static const int WAIT_LINKLOCK_OFFSET = 5 + SimpleLock::WAIT_BITS;
+ static const int WAIT_DIRFRAGTREELOCK_OFFSET = 5 + 2*SimpleLock::WAIT_BITS;
+ static const int WAIT_FILELOCK_OFFSET = 5 + 3*SimpleLock::WAIT_BITS;
+ static const int WAIT_VERSIONLOCK_OFFSET = 5 + 4*SimpleLock::WAIT_BITS;
+ static const int WAIT_XATTRLOCK_OFFSET = 5 + 5*SimpleLock::WAIT_BITS;
+ static const int WAIT_SNAPLOCK_OFFSET = 5 + 6*SimpleLock::WAIT_BITS;
+ static const int WAIT_NESTLOCK_OFFSET = 5 + 7*SimpleLock::WAIT_BITS;
static const __u64 WAIT_ANY_MASK = (__u64)(-1);
+void MDCache::truncate_inode(CInode *in, LogSegment *ls)
+{
+ inode_t *pi = in->get_projected_inode();
+ dout(10) << "truncate_inode " << pi->truncate_from << " -> " << pi->truncate_size
+ << " on " << *in
+ << dendl;
+
+ SnapRealm *realm = in->find_snaprealm();
+ SnapContext nullsnap;
+ const SnapContext *snapc;
+ if (realm) {
+ dout(10) << " realm " << *realm << dendl;
+ snapc = &realm->get_snap_context();
+ } else {
+ dout(10) << " NO realm, using null context" << dendl;
+ snapc = &nullsnap;
+ assert(in->last == CEPH_NOSNAP);
+ }
+ dout(10) << "truncate_inode snapc " << snapc << " on " << *in << dendl;
+ mds->filer->truncate(in->inode.ino, &in->inode.layout, *snapc,
+ pi->truncate_size, pi->truncate_from-pi->truncate_size, pi->truncate_seq, 0,
+ 0, 0);//new C_MDC_PurgeFinish(this, in, newsize, oldsize));
+
+
+}
// **************
// Inode purging -- reliably removing deleted file's objects
};
/* purge_inode in
- * will be called by on unlink or rmdir or truncate or purge
+ * will be called by on unlink or rmdir or purge
* caller responsible for journaling a matching EUpdate
*/
void MDCache::purge_inode(CInode *in, loff_t newsize, loff_t oldsize, LogSegment *ls)
void rename_file(CDentry *srcdn, CDentry *destdn);
public:
+ // truncate
+ void truncate_inode(CInode *in, LogSegment *ls);
+
// inode purging
void purge_inode(CInode *in, loff_t newsize, loff_t oldsize, LogSegment *ls);
void _do_purge_inode(CInode *in, loff_t newsize, loff_t oldsize);
in->inode.version = 1;
in->inode.nlink = 1; // FIXME
in->inode.layout = g_default_file_layout;
-
+
+ in->inode.truncate_size = -1ull; // not truncated, yet!
+
in->inode.uid = mdr->client_request->get_caller_uid();
in->inode.gid = mdr->client_request->get_caller_gid();
in->inode.ctime = in->inode.mtime = in->inode.atime = mdr->now; // now
// ===================================
// TRUNCATE, FSYNC
-class C_MDS_truncate_purged : public Context {
+struct DelayTrunc : public Context {
MDS *mds;
- MDRequest *mdr;
-public:
- C_MDS_truncate_purged(MDS *m, MDRequest *r) :
- mds(m), mdr(r) {}
+ CInode *in;
+ LogSegment *ls;
+ DelayTrunc(MDS *m, CInode *i, LogSegment *l) : mds(m), in(i), ls(l) {}
void finish(int r) {
- assert(r == 0);
- mds->server->reply_request(mdr, 0);
+ mds->mdcache->truncate_inode(in, ls);
}
};
void finish(int r) {
assert(r == 0);
- // apply to cache
- __u64 old_size = in->inode.size;
+ // apply
in->pop_and_dirty_projected_inode(mdr->ls);
-
mdr->apply();
// notify any clients
mds->locker->issue_truncate(in);
+ //mds->mdcache->truncate_inode(in, mdr->ls);
+ mds->timer.add_event_after(10.0, new DelayTrunc(mds, in, mdr->ls));
- if (old_size <= in->inode.size) {
- // forward truncate. done!
- mds->server->reply_request(mdr, 0);
- } else {
- // purge
- mds->mdcache->purge_inode(in, in->inode.size, old_size, mdr->ls);
- mds->mdcache->wait_for_purge(in, in->inode.size,
- new C_MDS_truncate_purged(mds, mdr));
- }
+ mds->balancer->hit_inode(mdr->now, in, META_POP_IWR);
+
+ mds->server->reply_request(mdr, 0);
}
};
set<SimpleLock*> rdlocks = mdr->rdlocks;
set<SimpleLock*> wrlocks = mdr->wrlocks;
set<SimpleLock*> xlocks = mdr->xlocks;
- xlocks.insert(&cur->filelock);
+ wrlocks.insert(&cur->filelock);
mds->locker->include_snap_rdlocks(rdlocks, cur);
if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks))
return;
// already the correct size?
- if (cur->inode.size == req->head.args.truncate.length) {
+ inode_t *pi = cur->get_projected_inode();
+ __u64 old_size = MAX(pi->size, req->head.args.truncate.old_length);
+ if (old_size == req->head.args.truncate.length) {
reply_request(mdr, 0);
return;
}
+ if (old_size > req->head.args.truncate.length && pi->is_truncating()) {
+ dout(10) << " waiting for pending truncate from " << pi->truncate_from
+ << " to " << pi->truncate_size << " to complete on " << *cur << dendl;
+ cur->add_waiter(CInode::WAIT_TRUNC, new C_MDS_RetryRequest(mdcache, mdr));
+ return;
+ }
+
// prepare
version_t pdv = cur->pre_dirty();
utime_t ctime = g_clock.real_now();
- Context *fin = new C_MDS_truncate_logged(mds, mdr, cur);
-
- // log + wait
+
mdr->ls = mdlog->get_current_segment();
EUpdate *le = new EUpdate(mdlog, "truncate");
le->metablob.add_client_req(mdr->reqid);
le->metablob.add_inode_truncate(cur->ino(), req->head.args.truncate.length, cur->inode.size);
- inode_t *pi = cur->project_inode();
+ pi = cur->project_inode();
pi->mtime = ctime;
pi->ctime = ctime;
pi->version = pdv;
- pi->size = req->head.args.truncate.length;
- pi->rstat.rbytes = pi->size;
- pi->truncate_seq++;
+ if (old_size > req->head.args.truncate.length) {
+ // truncate to smaller size
+ pi->truncate_from = old_size;
+ pi->size = req->head.args.truncate.length;
+ pi->rstat.rbytes = pi->size;
+ pi->truncate_size = pi->size;
+ pi->truncate_seq++;
+ } else {
+ // truncate to larger size
+ pi->size = req->head.args.truncate.length;
+ pi->rstat.rbytes = pi->size;
+ }
mdcache->predirty_journal_parents(mdr, &le->metablob, cur, 0, PREDIRTY_PRIMARY, false);
mdcache->journal_dirty_inode(mdr, &le->metablob, cur);
- mdlog->submit_entry(le, fin);
+ journal_and_reply(mdr, cur, 0, le, new C_MDS_truncate_logged(mds, mdr, cur));
}
uint64_t size; // on directory, # dentries
uint64_t max_size; // client(s) are auth to write this much...
uint32_t truncate_seq;
- uint64_t truncate_size;
+ uint64_t truncate_size, truncate_from;
utime_t mtime; // file data modify time.
utime_t atime; // file data access time.
uint32_t time_warp_seq; // count of (potential) mtime/atime timewarps (i.e., utimes())
bool is_dir() const { return (mode & S_IFMT) == S_IFDIR; }
bool is_file() const { return (mode & S_IFMT) == S_IFREG; }
+ bool is_truncating() const { return truncate_size != -1ull; }
+
void encode(bufferlist &bl) const {
::encode(ino, bl);
::encode(rdev, bl);
::encode(max_size, bl);
::encode(truncate_seq, bl);
::encode(truncate_size, bl);
+ ::encode(truncate_from, bl);
::encode(mtime, bl);
::encode(atime, bl);
::encode(time_warp_seq, bl);
::decode(max_size, p);
::decode(truncate_seq, p);
::decode(truncate_size, p);
+ ::decode(truncate_from, p);
::decode(mtime, p);
::decode(atime, p);
::decode(time_warp_seq, p);
return 0;
}
+ int truncate(inodeno_t ino,
+ ceph_file_layout *layout,
+ const SnapContext& snapc,
+ __u64 offset,
+ size_t len,
+ __u32 truncate_seq,
+ int flags,
+ Context *onack,
+ Context *oncommit) {
+ bufferlist bl;
+ vector<ObjectExtent> extents;
+ file_to_extents(ino, layout, CEPH_NOSNAP, offset, len, extents);
+ if (extents.size() == 1) {
+ vector<ceph_osd_op> ops(1);
+ memset(&ops[0], 0, sizeof(ops[0]));
+ ops[0].op = CEPH_OSD_OP_TRIMTRUNC;
+ ops[0].truncate_seq = truncate_seq;
+ ops[0].truncate_size = extents[0].offset;
+ objecter->modify(extents[0].oid, extents[0].layout, ops, snapc, bl, flags, onack, oncommit);
+ } else {
+ C_Gather *gack = 0, *gcom = 0;
+ if (onack)
+ gack = new C_Gather(onack);
+ if (oncommit)
+ gcom = new C_Gather(oncommit);
+ for (vector<ObjectExtent>::iterator p = extents.begin(); p != extents.end(); p++) {
+ vector<ceph_osd_op> ops(1);
+ memset(&ops[0], 0, sizeof(ops[0]));
+ ops[0].op = CEPH_OSD_OP_TRIMTRUNC;
+ ops[0].truncate_size = p->offset;
+ ops[0].truncate_seq = truncate_seq;
+ objecter->modify(extents[0].oid, p->layout, ops, snapc, bl, flags,
+ gack ? gack->new_sub():0,
+ gcom ? gcom->new_sub():0);
+ }
+ }
+ return 0;
+ }
+
int zero(inodeno_t ino,
ceph_file_layout *layout,
const SnapContext& snapc,