compact_map<int32_t, int32_t> mds_caps_wanted; // [auth] mds -> caps wanted
int replica_caps_wanted; // [replica] what i've requested from auth
- compact_map<int, std::set<client_t> > client_snap_caps; // [auth] [snap] dirty metadata we still need from the head
public:
+ compact_map<int, std::set<client_t> > client_snap_caps; // [auth] [snap] dirty metadata we still need from the head
compact_map<snapid_t, std::set<client_t> > client_need_snapflush;
void add_need_snapflush(CInode *snapin, snapid_t snapid, client_t client);
} else
++p;
}
- if (gather)
+ if (gather) {
+ if (in->client_snap_caps.empty())
+ in->item_open_file.remove_myself();
eval_cap_gather(in, &need_issue);
+ }
} else {
if (cap && (cap->wanted() & ~cap->pending()) &&
need_issue.count(in) == 0) { // if we won't issue below anyway
CDentry *olddn = dn->dir->add_primary_dentry(dn->name, oldin, oldfirst, follows);
oldin->inode.version = olddn->pre_dirty();
dout(10) << " olddn " << *olddn << dendl;
- metablob->add_primary_dentry(olddn, 0, true);
+ bool need_snapflush = !oldin->client_snap_caps.empty();
+ if (need_snapflush)
+ mut->ls->open_files.push_back(&oldin->item_open_file);
+ metablob->add_primary_dentry(olddn, 0, true, false, false, need_snapflush);
mut->add_cow_dentry(olddn);
} else {
assert(dnl->is_remote());
p != mds->mdlog->segments.end();
++p) {
LogSegment *ls = p->second;
-
+
elist<CInode*>::iterator q = ls->open_files.begin(member_offset(CInode, item_open_file));
while (!q.end()) {
CInode *in = *q;
++q;
- if (!in->is_any_caps_wanted()) {
- dout(10) << " unlisting unwanted/capless inode " << *in << dendl;
- in->item_open_file.remove_myself();
+ if (in->last == CEPH_NOSNAP) {
+ if (!in->is_any_caps_wanted()) {
+ dout(10) << " unlisting unwanted/capless inode " << *in << dendl;
+ in->item_open_file.remove_myself();
+ }
+ } else if (in->last != CEPH_NOSNAP) {
+ if (in->client_snap_caps.empty()) {
+ dout(10) << " unlisting flushed snap inode " << *in << dendl;
+ in->item_open_file.remove_myself();
+ }
}
}
}
static const int STATE_DIRTY = (1<<0);
static const int STATE_DIRTYPARENT = (1<<1);
static const int STATE_DIRTYPOOL = (1<<2);
+ static const int STATE_NEED_SNAPFLUSH = (1<<3);
typedef compact_map<snapid_t, old_inode_t> old_inodes_t;
string dn; // dentry
snapid_t dnfirst, dnlast;
bool is_dirty() const { return (state & STATE_DIRTY); }
bool is_dirty_parent() const { return (state & STATE_DIRTYPARENT); }
bool is_dirty_pool() const { return (state & STATE_DIRTYPOOL); }
+ bool need_snapflush() const { return (state & STATE_NEED_SNAPFLUSH); }
void print(ostream& out) const {
out << " fullbit dn " << dn << " [" << dnfirst << "," << dnlast << "] dnv " << dnv
// return remote pointer to to-be-journaled inode
void add_primary_dentry(CDentry *dn, CInode *in, bool dirty,
- bool dirty_parent=false, bool dirty_pool=false) {
+ bool dirty_parent=false, bool dirty_pool=false,
+ bool need_snapflush=false) {
__u8 state = 0;
if (dirty) state |= fullbit::STATE_DIRTY;
if (dirty_parent) state |= fullbit::STATE_DIRTYPARENT;
if (dirty_pool) state |= fullbit::STATE_DIRTYPOOL;
+ if (need_snapflush) state |= fullbit::STATE_NEED_SNAPFLUSH;
add_primary_dentry(add_dir(dn->get_dir(), false), dn, in, state);
}
void add_primary_dentry(dirlump& lump, CDentry *dn, CInode *in, __u8 state) {
public:
EMetaBlob metablob;
vector<inodeno_t> inos;
+ vector<vinodeno_t> snap_inos;
EOpen() : LogEvent(EVENT_OPEN) { }
explicit EOpen(MDLog *mdlog) :
if (!in->is_base()) {
metablob.add_dir_context(in->get_projected_parent_dn()->get_dir());
metablob.add_primary_dentry(in->get_projected_parent_dn(), 0, false);
- inos.push_back(in->ino());
+ if (in->last == CEPH_NOSNAP)
+ inos.push_back(in->ino());
+ else
+ snap_inos.push_back(in->vino());
}
}
void add_ino(inodeno_t ino) {
assert(g_conf->mds_kill_journal_expire_at != 2);
- // open files
+ // open files and snap inodes
if (!open_files.empty()) {
assert(!mds->mdlog->is_capped()); // hmm FIXME
EOpen *le = 0;
elist<CInode*>::iterator p = open_files.begin(member_offset(CInode, item_open_file));
while (!p.end()) {
CInode *in = *p;
- assert(in->last == CEPH_NOSNAP);
++p;
- if (in->is_auth() && !in->is_ambiguous_auth() && in->is_any_caps()) {
+ if (in->last == CEPH_NOSNAP && in->is_auth() &&
+ !in->is_ambiguous_auth() && in->is_any_caps()) {
if (in->is_any_caps_wanted()) {
dout(20) << "try_to_expire requeueing open file " << *in << dendl;
if (!le) {
dout(20) << "try_to_expire not requeueing and delisting unwanted file " << *in << dendl;
in->item_open_file.remove_myself();
}
+ } else if (in->last != CEPH_NOSNAP && !in->client_snap_caps.empty()) {
+ // journal snap inodes that need flush. This simplify the mds failover hanlding
+ dout(20) << "try_to_expire requeueing snap needflush inode " << *in << dendl;
+ if (!le) {
+ le = new EOpen(mds->mdlog);
+ mds->mdlog->start_entry(le);
+ }
+ le->add_clean_inode(in);
+ ls->open_files.push_back(&in->item_open_file);
} else {
/*
* we can get a capless inode here if we replay an open file, the client fails to
in->_mark_dirty(logseg);
if (p->is_dirty_parent())
in->_mark_dirty_parent(logseg, p->is_dirty_pool());
+ if (p->need_snapflush())
+ logseg->open_files.push_back(&in->item_open_file);
if (dn->is_auth())
in->state_set(CInode::STATE_AUTH);
else
// EOpen
void EOpen::encode(bufferlist &bl, uint64_t features) const {
- ENCODE_START(3, 3, bl);
+ ENCODE_START(4, 3, bl);
::encode(stamp, bl);
::encode(metablob, bl, features);
::encode(inos, bl);
+ ::encode(snap_inos, bl);
ENCODE_FINISH(bl);
}
::decode(stamp, bl);
::decode(metablob, bl);
::decode(inos, bl);
+ if (struct_v >= 4)
+ ::decode(snap_inos, bl);
DECODE_FINISH(bl);
}
metablob.replay(mds, _segment);
// note which segments inodes belong to, so we don't have to start rejournaling them
- for (vector<inodeno_t>::iterator p = inos.begin();
- p != inos.end();
- ++p) {
- CInode *in = mds->mdcache->get_inode(*p);
+ for (const auto &ino : inos) {
+ CInode *in = mds->mdcache->get_inode(ino);
+ if (!in) {
+ dout(0) << "EOpen.replay ino " << ino << " not in metablob" << dendl;
+ assert(in);
+ }
+ _segment->open_files.push_back(&in->item_open_file);
+ }
+ for (const auto &vino : snap_inos) {
+ CInode *in = mds->mdcache->get_inode(vino);
if (!in) {
- dout(0) << "EOpen.replay ino " << *p << " not in metablob" << dendl;
+ dout(0) << "EOpen.replay ino " << vino << " not in metablob" << dendl;
assert(in);
}
_segment->open_files.push_back(&in->item_open_file);