Logsegs were allowed to trim even with outstanding purges.. fix.
static const int PIN_IMPORTINGCAPS = 15;
static const int PIN_PASTSNAPPARENT = -16;
static const int PIN_OPENINGSNAPPARENTS = 17;
+ static const int PIN_TRUNCATING = 18;
const char *pin_name(int p) {
switch (p) {
case PIN_IMPORTINGCAPS: return "importingcaps";
case PIN_PASTSNAPPARENT: return "pastsnapparent";
case PIN_OPENINGSNAPPARENTS: return "openingsnapparents";
+ case PIN_TRUNCATING: return "truncating";
default: return generic_pin_name(p);
}
}
xlist<MDSlaveUpdate*> slave_updates;
//xlist<CInode*> purging_inodes;
+ set<CInode*> truncating_inodes;
map<CInode*, map<loff_t,loff_t> > purging_inodes;
map<int, hash_set<version_t> > pending_commit_tids; // mdstable
-
+// ----------------------------
+// truncate
void MDCache::truncate_inode(CInode *in, LogSegment *ls)
{
inode_t *pi = in->get_projected_inode();
- dout(10) << "truncate_inode " << pi->truncate_from << " -> " << pi->truncate_size
+ dout(10) << "truncate_inode "
+ << pi->truncate_from << " -> " << pi->truncate_size
<< " on " << *in
<< dendl;
+ ls->truncating_inodes.insert(in);
+
+ _truncate_inode(in, ls);
+}
+
+struct C_MDC_TruncateFinish : public Context {
+ MDCache *mdc;
+ CInode *in;
+ LogSegment *ls;
+ C_MDC_TruncateFinish(MDCache *c, CInode *i, LogSegment *l) :
+ mdc(c), in(i), ls(l) {}
+ void finish(int r) {
+ mdc->truncate_inode_finish(in, ls);
+ }
+};
+
+void MDCache::_truncate_inode(CInode *in, LogSegment *ls)
+{
+ inode_t *pi = in->get_projected_inode();
+ dout(10) << "_truncate_inode "
+ << pi->truncate_from << " -> " << pi->truncate_size
+ << " on " << *in << dendl;
+
+ in->get(CInode::PIN_TRUNCATING);
+ in->auth_pin(this);
+
SnapRealm *realm = in->find_snaprealm();
SnapContext nullsnap;
const SnapContext *snapc;
dout(10) << "truncate_inode snapc " << snapc << " on " << *in << dendl;
mds->filer->truncate(in->inode.ino, &in->inode.layout, *snapc,
pi->truncate_size, pi->truncate_from-pi->truncate_size, pi->truncate_seq, 0,
- 0, 0);//new C_MDC_PurgeFinish(this, in, newsize, oldsize));
+ 0, new C_MDC_TruncateFinish(this, in, ls));
+}
+
+struct C_MDC_TruncateLogged : public Context {
+ MDCache *mdc;
+ CInode *in;
+ Mutation *mut;
+ C_MDC_TruncateLogged(MDCache *m, CInode *i, Mutation *mu) : mdc(m), in(i), mut(mu) {}
+ void finish(int r) {
+ mdc->truncate_inode_logged(in, mut);
+ }
+};
+
+void MDCache::truncate_inode_finish(CInode *in, LogSegment *ls)
+{
+ dout(10) << "truncate_inode_finish " << *in << dendl;
+
+ ls->truncating_inodes.erase(in);
+
+ // update
+ inode_t *pi = in->project_inode();
+ pi->version = in->pre_dirty();
+ pi->truncate_from = 0;
+ pi->truncate_size = (__u64)-1ull;
+
+ Mutation *mut = new Mutation;
+ mut->ls = mds->mdlog->get_current_segment();
+ mut->add_projected_inode(in);
+
+ EUpdate *le = new EUpdate(mds->mdlog, "truncate finish");
+ le->metablob.add_dir_context(in->get_parent_dir());
+ le->metablob.add_primary_dentry(in->get_projected_parent_dn(), true, in, pi);
+ le->metablob.add_truncate_finish(in->ino(), ls->offset);
+
+ journal_dirty_inode(mut, &le->metablob, in);
+ mds->mdlog->submit_entry(le, new C_MDC_TruncateLogged(this, in, mut));
+}
+
+void MDCache::truncate_inode_logged(CInode *in, Mutation *mut)
+{
+ dout(10) << "truncate_inode_logged " << *in << dendl;
+ mut->apply();
+ delete mut;
+ in->put(CInode::PIN_TRUNCATING);
+ in->auth_unpin(this);
+ list<Context*> waiters;
+ in->take_waiting(CInode::WAIT_TRUNC, waiters);
+ mds->queue_waiters(waiters);
}
-// **************
-// Inode purging -- reliably removing deleted file's objects
+
+void MDCache::add_recovered_truncate(CInode *in, LogSegment *ls)
+{
+ ls->truncating_inodes.insert(in);
+}
+
+void MDCache::start_recovered_truncates()
+{
+ dout(10) << "start_recovered_truncates" << dendl;
+ for (map<loff_t,LogSegment*>::iterator p = mds->mdlog->segments.begin();
+ p != mds->mdlog->segments.end();
+ p++) {
+ LogSegment *ls = p->second;
+ for (set<CInode*>::iterator q = ls->truncating_inodes.begin();
+ q != ls->truncating_inodes.end();
+ q++)
+ _truncate_inode(*q, ls);
+ }
+}
+
+
+
+// ----------------------------
+// purge
class C_MDC_PurgeFinish : public Context {
MDCache *mdc;
public:
// truncate
void truncate_inode(CInode *in, LogSegment *ls);
+ void _truncate_inode(CInode *in, LogSegment *ls);
+ void truncate_inode_finish(CInode *in, LogSegment *ls);
+ void truncate_inode_logged(CInode *in, Mutation *mut);
+
+ void add_recovered_truncate(CInode *in, LogSegment *ls);
+ void remove_recovered_truncate(CInode *in);
+ void start_recovered_truncates();
// inode purging
void purge_inode(CInode *in, loff_t newsize, loff_t oldsize, LogSegment *ls);
return segments.empty() ? 0:segments.rbegin()->second;
}
+ LogSegment *get_segment(__u64 off) {
+ if (segments.count(off))
+ return segments[off];
+ return NULL;
+ }
+
void flush_logger();
anchorclient->finish_recovery();
snapclient->finish_recovery();
+ mdcache->start_recovered_truncates();
mdcache->start_recovered_purges();
mdcache->do_file_recover();
// ===================================
// TRUNCATE, FSYNC
-struct DelayTrunc : public Context {
- MDS *mds;
- CInode *in;
- LogSegment *ls;
- DelayTrunc(MDS *m, CInode *i, LogSegment *l) : mds(m), in(i), ls(l) {}
- void finish(int r) {
- mds->mdcache->truncate_inode(in, ls);
- }
-};
-
class C_MDS_truncate_logged : public Context {
MDS *mds;
MDRequest *mdr;
// notify any clients
mds->locker->issue_truncate(in);
- //mds->mdcache->truncate_inode(in, mdr->ls);
- mds->timer.add_event_after(10.0, new DelayTrunc(mds, in, mdr->ls));
+ mds->mdcache->truncate_inode(in, mdr->ls);
mds->balancer->hit_inode(mdr->now, in, META_POP_IWR);
mdr->ls = mdlog->get_current_segment();
EUpdate *le = new EUpdate(mdlog, "truncate");
le->metablob.add_client_req(mdr->reqid);
- le->metablob.add_inode_truncate(cur->ino(), req->head.args.truncate.length, cur->inode.size);
pi = cur->project_inode();
pi->mtime = ctime;
pi->ctime = ctime;
pi->rstat.rbytes = pi->size;
pi->truncate_size = pi->size;
pi->truncate_seq++;
+ le->metablob.add_truncate_start(cur->ino());
} else {
// truncate to larger size
pi->size = req->head.args.truncate.length;
mdr->ls = mdlog->get_current_segment();
EUpdate *le = new EUpdate(mdlog, "open_truncate");
le->metablob.add_client_req(mdr->reqid);
- le->metablob.add_inode_truncate(cur->ino(), 0, cur->inode.size);
+ le->metablob.add_inode_purge(cur->ino(), 0, cur->inode.size);
inode_t *pi = cur->project_inode();
pi->mtime = ctime;
pi->ctime = ctime;
entity_name_t client_name; // session
// inodes i've truncated
- list< triple<inodeno_t,uint64_t,uint64_t> > truncated_inodes;
+ list<inodeno_t> truncate_start; // start truncate
+ map<inodeno_t,__u64> truncate_finish; // finished truncate (started in segment blah)
+
+ list< triple<inodeno_t,uint64_t,uint64_t> > purging_inodes;
vector<inodeno_t> destroyed_inodes;
// idempotent op(s)
::encode(client_name, bl);
::encode(inotablev, bl);
::encode(sessionmapv, bl);
- ::encode(truncated_inodes, bl);
+ ::encode(truncate_start, bl);
+ ::encode(truncate_finish, bl);
+ ::encode(purging_inodes, bl);
::encode(destroyed_inodes, bl);
::encode(client_reqs, bl);
}
::decode(client_name, bl);
::decode(inotablev, bl);
::decode(sessionmapv, bl);
- ::decode(truncated_inodes, bl);
+ ::decode(truncate_start, bl);
+ ::decode(truncate_finish, bl);
+ ::decode(purging_inodes, bl);
::decode(destroyed_inodes, bl);
::decode(client_reqs, bl);
}
inotablev = iv;
}
- void add_inode_truncate(inodeno_t ino, uint64_t newsize, uint64_t oldsize) {
- truncated_inodes.push_back(triple<inodeno_t,uint64_t,uint64_t>(ino, newsize, oldsize));
+ void add_truncate_start(inodeno_t ino) {
+ truncate_start.push_back(ino);
+ }
+ void add_truncate_finish(inodeno_t ino, __u64 segoff) {
+ truncate_finish[ino] = segoff;
+ }
+
+ void add_inode_purge(inodeno_t ino, uint64_t newsize, uint64_t oldsize) {
+ purging_inodes.push_back(triple<inodeno_t,uint64_t,uint64_t>(ino, newsize, oldsize));
}
void add_destroyed_inode(inodeno_t ino) {
destroyed_inodes.push_back(ino);
}
}
+ // truncating
+ for (set<CInode*>::iterator p = truncating_inodes.begin();
+ p != truncating_inodes.end();
+ p++) {
+ dout(10) << "try_to_expire waiting for truncate of " << **p << dendl;
+ if (!gather) gather = new C_Gather;
+ (*p)->add_waiter(CInode::WAIT_TRUNC, gather->new_sub());
+ }
+
+ // purging
+ for (map<CInode*, map<loff_t,loff_t> >::iterator p = purging_inodes.begin();
+ p != purging_inodes.end();
+ ++p) {
+ CInode *in = p->first;
+ dout(10) << "try_to_expire waiting for purge of " << *in << dendl;
+ if (!gather) gather = new C_Gather;
+ mds->mdcache->wait_for_purge(in, p->second.begin()->first, gather->new_sub());
+ }
+
// FIXME client requests...?
// audit handling of anchor transactions?
}
}
- // truncated inodes
- for (list< triple<inodeno_t,uint64_t,uint64_t> >::iterator p = truncated_inodes.begin();
- p != truncated_inodes.end();
+ // truncating inodes
+ for (list<inodeno_t>::iterator p = truncate_start.begin();
+ p != truncate_start.end();
+ p++) {
+ CInode *in = mds->mdcache->get_inode(*p);
+ assert(in);
+ mds->mdcache->add_recovered_truncate(in, logseg);
+ }
+ for (map<inodeno_t,__u64>::iterator p = truncate_finish.begin();
+ p != truncate_finish.end();
+ p++) {
+ LogSegment *ls = mds->mdlog->get_segment(p->second);
+ if (ls) {
+ CInode *in = mds->mdcache->get_inode(p->first);
+ assert(in);
+ ls->truncating_inodes.erase(in);
+ }
+ }
+
+ // purging inodes
+ for (list< triple<inodeno_t,uint64_t,uint64_t> >::iterator p = purging_inodes.begin();
+ p != purging_inodes.end();
++p) {
CInode *in = mds->mdcache->get_inode(p->first);
assert(in);
- dout(10) << "EMetaBlob.replay will purge truncated "
+ dout(10) << "EMetaBlob.replay will purging "
<< p->third << " -> " << p->second
<< " on " << *in << dendl;
mds->mdcache->add_recovered_purge(in, p->second, p->third, logseg);