Treat snap inodes that need flush in the same way as open files.
When MDS recovers, this make sure that journal replay bring snap
inodes that need flush into the cache
Signed-off-by: Yan, Zheng <zyan@redhat.com>
compact_map<int32_t, int32_t> mds_caps_wanted; // [auth] mds -> caps wanted
int replica_caps_wanted; // [replica] what i've requested from auth
- compact_map<int, std::set<client_t> > client_snap_caps; // [auth] [snap] dirty metadata we still need from the head
public:
+ compact_map<int, std::set<client_t> > client_snap_caps; // [auth] [snap] dirty metadata we still need from the head
compact_map<snapid_t, std::set<client_t> > client_need_snapflush;
void add_need_snapflush(CInode *snapin, snapid_t snapid, client_t client);
} else
++p;
}
- if (gather)
+ if (gather) {
+ if (in->client_snap_caps.empty())
+ in->item_open_file.remove_myself();
eval_cap_gather(in, &need_issue);
+ }
} else {
if (cap && (cap->wanted() & ~cap->pending()) &&
need_issue.count(in) == 0) { // if we won't issue below anyway
CDentry *olddn = dn->dir->add_primary_dentry(dn->name, oldin, oldfirst, follows);
oldin->inode.version = olddn->pre_dirty();
dout(10) << " olddn " << *olddn << dendl;
- metablob->add_primary_dentry(olddn, 0, true);
+ bool need_snapflush = !oldin->client_snap_caps.empty();
+ if (need_snapflush)
+ mut->ls->open_files.push_back(&oldin->item_open_file);
+ metablob->add_primary_dentry(olddn, 0, true, false, false, need_snapflush);
mut->add_cow_dentry(olddn);
} else {
assert(dnl->is_remote());
p != mds->mdlog->segments.end();
++p) {
LogSegment *ls = p->second;
-
+
elist<CInode*>::iterator q = ls->open_files.begin(member_offset(CInode, item_open_file));
while (!q.end()) {
CInode *in = *q;
++q;
- if (!in->is_any_caps_wanted()) {
- dout(10) << " unlisting unwanted/capless inode " << *in << dendl;
- in->item_open_file.remove_myself();
+ if (in->last == CEPH_NOSNAP) {
+ if (!in->is_any_caps_wanted()) {
+ dout(10) << " unlisting unwanted/capless inode " << *in << dendl;
+ in->item_open_file.remove_myself();
+ }
+ } else if (in->last != CEPH_NOSNAP) {
+ if (in->client_snap_caps.empty()) {
+ dout(10) << " unlisting flushed snap inode " << *in << dendl;
+ in->item_open_file.remove_myself();
+ }
}
}
}
static const int STATE_DIRTY = (1<<0);
static const int STATE_DIRTYPARENT = (1<<1);
static const int STATE_DIRTYPOOL = (1<<2);
+ static const int STATE_NEED_SNAPFLUSH = (1<<3);
typedef compact_map<snapid_t, old_inode_t> old_inodes_t;
string dn; // dentry
snapid_t dnfirst, dnlast;
bool is_dirty() const { return (state & STATE_DIRTY); }
bool is_dirty_parent() const { return (state & STATE_DIRTYPARENT); }
bool is_dirty_pool() const { return (state & STATE_DIRTYPOOL); }
+ bool need_snapflush() const { return (state & STATE_NEED_SNAPFLUSH); }
void print(ostream& out) const {
out << " fullbit dn " << dn << " [" << dnfirst << "," << dnlast << "] dnv " << dnv
// return remote pointer to to-be-journaled inode
void add_primary_dentry(CDentry *dn, CInode *in, bool dirty,
- bool dirty_parent=false, bool dirty_pool=false) {
+ bool dirty_parent=false, bool dirty_pool=false,
+ bool need_snapflush=false) {
__u8 state = 0;
if (dirty) state |= fullbit::STATE_DIRTY;
if (dirty_parent) state |= fullbit::STATE_DIRTYPARENT;
if (dirty_pool) state |= fullbit::STATE_DIRTYPOOL;
+ if (need_snapflush) state |= fullbit::STATE_NEED_SNAPFLUSH;
add_primary_dentry(add_dir(dn->get_dir(), false), dn, in, state);
}
void add_primary_dentry(dirlump& lump, CDentry *dn, CInode *in, __u8 state) {
public:
EMetaBlob metablob;
vector<inodeno_t> inos;
+ vector<vinodeno_t> snap_inos;
EOpen() : LogEvent(EVENT_OPEN) { }
explicit EOpen(MDLog *mdlog) :
if (!in->is_base()) {
metablob.add_dir_context(in->get_projected_parent_dn()->get_dir());
metablob.add_primary_dentry(in->get_projected_parent_dn(), 0, false);
- inos.push_back(in->ino());
+ if (in->last == CEPH_NOSNAP)
+ inos.push_back(in->ino());
+ else
+ snap_inos.push_back(in->vino());
}
}
void add_ino(inodeno_t ino) {
assert(g_conf->mds_kill_journal_expire_at != 2);
- // open files
+ // open files and snap inodes
if (!open_files.empty()) {
assert(!mds->mdlog->is_capped()); // hmm FIXME
EOpen *le = 0;
elist<CInode*>::iterator p = open_files.begin(member_offset(CInode, item_open_file));
while (!p.end()) {
CInode *in = *p;
- assert(in->last == CEPH_NOSNAP);
++p;
- if (in->is_auth() && !in->is_ambiguous_auth() && in->is_any_caps()) {
+ if (in->last == CEPH_NOSNAP && in->is_auth() &&
+ !in->is_ambiguous_auth() && in->is_any_caps()) {
if (in->is_any_caps_wanted()) {
dout(20) << "try_to_expire requeueing open file " << *in << dendl;
if (!le) {
dout(20) << "try_to_expire not requeueing and delisting unwanted file " << *in << dendl;
in->item_open_file.remove_myself();
}
+ } else if (in->last != CEPH_NOSNAP && !in->client_snap_caps.empty()) {
+ // journal snap inodes that need flush. This simplify the mds failover hanlding
+ dout(20) << "try_to_expire requeueing snap needflush inode " << *in << dendl;
+ if (!le) {
+ le = new EOpen(mds->mdlog);
+ mds->mdlog->start_entry(le);
+ }
+ le->add_clean_inode(in);
+ ls->open_files.push_back(&in->item_open_file);
} else {
/*
* we can get a capless inode here if we replay an open file, the client fails to
in->_mark_dirty(logseg);
if (p->is_dirty_parent())
in->_mark_dirty_parent(logseg, p->is_dirty_pool());
+ if (p->need_snapflush())
+ logseg->open_files.push_back(&in->item_open_file);
if (dn->is_auth())
in->state_set(CInode::STATE_AUTH);
else
// EOpen
void EOpen::encode(bufferlist &bl, uint64_t features) const {
- ENCODE_START(3, 3, bl);
+ ENCODE_START(4, 3, bl);
::encode(stamp, bl);
::encode(metablob, bl, features);
::encode(inos, bl);
+ ::encode(snap_inos, bl);
ENCODE_FINISH(bl);
}
::decode(stamp, bl);
::decode(metablob, bl);
::decode(inos, bl);
+ if (struct_v >= 4)
+ ::decode(snap_inos, bl);
DECODE_FINISH(bl);
}
metablob.replay(mds, _segment);
// note which segments inodes belong to, so we don't have to start rejournaling them
- for (vector<inodeno_t>::iterator p = inos.begin();
- p != inos.end();
- ++p) {
- CInode *in = mds->mdcache->get_inode(*p);
+ for (const auto &ino : inos) {
+ CInode *in = mds->mdcache->get_inode(ino);
+ if (!in) {
+ dout(0) << "EOpen.replay ino " << ino << " not in metablob" << dendl;
+ assert(in);
+ }
+ _segment->open_files.push_back(&in->item_open_file);
+ }
+ for (const auto &vino : snap_inos) {
+ CInode *in = mds->mdcache->get_inode(vino);
if (!in) {
- dout(0) << "EOpen.replay ino " << *p << " not in metablob" << dendl;
+ dout(0) << "EOpen.replay ino " << vino << " not in metablob" << dendl;
assert(in);
}
_segment->open_files.push_back(&in->item_open_file);