- ``mds session blacklist on timeout`` is now ``mds session blocklist on timeout``
- ``mds session blacklist on evict`` is now ``mds session blocklist on evict``
+* CephFS: Compatibility code for old on-disk format of snapshot has been removed.
+ Current on-disk format of snapshot was introduced by Mimic release. If there
+ are any snapshots created by Ceph release older than Mimic. Before upgrading,
+ either delete them all or scrub the whole filesystem:
+
+ ceph daemon <mds of rank 0> scrub_path / force recursive repair
+ ceph daemon <mds of rank 0> scrub_path '~mdsdir' force recursive repair
+
* The following librados API calls have changed:
- ``rados_blacklist_add`` is now ``rados_blocklist_add``; the former will issue a deprecation warning and be removed in a future release.
CEPH_MDS_OP_ENQUEUE_SCRUB = 0x01503,
CEPH_MDS_OP_REPAIR_FRAGSTATS = 0x01504,
CEPH_MDS_OP_REPAIR_INODESTATS = 0x01505,
- CEPH_MDS_OP_UPGRADE_SNAPREALM = 0x01506
};
extern const char *ceph_mds_op_name(int op);
list<CInode*> undef_inodes;
// purge stale snaps?
- // only if we have past_parents open!
bool force_dirty = false;
const set<snapid_t> *snaps = NULL;
SnapRealm *realm = inode->find_snaprealm();
- if (!realm->have_past_parents_open()) {
- dout(10) << " no snap purge, one or more past parents NOT open" << dendl;
- } else if (fnode->snap_purged_thru < realm->get_last_destroyed()) {
+ if (fnode->snap_purged_thru < realm->get_last_destroyed()) {
snaps = &realm->get_snaps();
dout(10) << " snap_purged_thru " << fnode->snap_purged_thru
<< " < " << realm->get_last_destroyed()
// snap purge?
const set<snapid_t> *snaps = NULL;
SnapRealm *realm = inode->find_snaprealm();
- if (!realm->have_past_parents_open()) {
- dout(10) << " no snap purge, one or more past parents NOT open" << dendl;
- } else if (fnode->snap_purged_thru < realm->get_last_destroyed()) {
+ if (fnode->snap_purged_thru < realm->get_last_destroyed()) {
snaps = &realm->get_snaps();
dout(10) << " snap_purged_thru " << fnode->snap_purged_thru
<< " < " << realm->get_last_destroyed()
if (!in->snaprealm) {
if (snaps)
in->purge_stale_snap_data(*snaps);
- } else if (in->snaprealm->have_past_parents_open()) {
+ } else {
in->purge_stale_snap_data(in->snaprealm->get_snaps());
}
}
if (cur_srnode) {
new_srnode = new sr_t(*cur_srnode);
- if (!new_srnode->past_parents.empty()) {
- // convert past_parents to past_parent_snaps
- ceph_assert(snaprealm);
- auto& snaps = snaprealm->get_snaps();
- for (auto p : snaps) {
- if (p >= new_srnode->current_parent_since)
- break;
- if (!new_srnode->snaps.count(p))
- new_srnode->past_parent_snaps.insert(p);
- }
- new_srnode->seq = snaprealm->get_newest_seq();
- new_srnode->past_parents.clear();
- }
- if (snaprealm)
- snaprealm->past_parents_dirty = false;
} else {
if (snapid == 0)
snapid = mdcache->get_global_snaprealm()->get_newest_seq();
if (next_snaprealm) {
dout(10) << __func__ << (early ? " (early) " : " ")
<< next_snaprealm << " seq " << next_snaprealm->seq << dendl;
- bool invalidate_cached_snaps = false;
- if (!snaprealm) {
+ if (!snaprealm)
open_snaprealm();
- } else if (!snaprealm->srnode.past_parents.empty()) {
- invalidate_cached_snaps = true;
- // re-open past parents
- snaprealm->close_parents();
- dout(10) << " realm " << *snaprealm << " past_parents " << snaprealm->srnode.past_parents
- << " -> " << next_snaprealm->past_parents << dendl;
- }
auto old_flags = snaprealm->srnode.flags;
snaprealm->srnode = *next_snaprealm;
delete next_snaprealm;
if ((snaprealm->srnode.flags ^ old_flags) & sr_t::PARENT_GLOBAL) {
- snaprealm->close_parents();
snaprealm->adjust_parent();
}
- // we should be able to open these up (or have them already be open).
- bool ok = snaprealm->_open_parents(NULL);
- ceph_assert(ok);
-
- if (invalidate_cached_snaps)
- snaprealm->invalidate_cached_snaps();
-
if (snaprealm->parent)
dout(10) << " realm " << *snaprealm << " parent " << *snaprealm->parent << dendl;
} else {
{
if (snaprealm) {
dout(15) << __func__ << " " << *snaprealm << dendl;
- snaprealm->close_parents();
if (snaprealm->parent) {
snaprealm->parent->open_children.erase(snaprealm);
//if (!nojoin)
auto old_flags = snaprealm->srnode.flags;
auto p = snapbl.cbegin();
decode(snaprealm->srnode, p);
- if (is_base()) {
- bool ok = snaprealm->_open_parents(NULL);
- ceph_assert(ok);
- } else {
+ if (!is_base()) {
if ((snaprealm->srnode.flags ^ old_flags) & sr_t::PARENT_GLOBAL) {
- snaprealm->close_parents();
snaprealm->adjust_parent();
}
}
set_callback(BACKTRACE, static_cast<Continuation::stagePtr>(&ValidationContinuation::_backtrace));
set_callback(INODE, static_cast<Continuation::stagePtr>(&ValidationContinuation::_inode_disk));
set_callback(DIRFRAGS, static_cast<Continuation::stagePtr>(&ValidationContinuation::_dirfrags));
- set_callback(SNAPREALM, static_cast<Continuation::stagePtr>(&ValidationContinuation::_snaprealm));
}
~ValidationContinuation() override {
return true;
}
- // prefetch snaprealm's past parents
- if (in->snaprealm && !in->snaprealm->have_past_parents_open())
- in->snaprealm->open_parents(nullptr);
-
C_OnFinisher *conf = new C_OnFinisher(get_io_callback(BACKTRACE),
in->mdcache->mds->finisher);
return validate_directory_data();
} else {
// TODO: validate on-disk inode for normal files
- return check_inode_snaprealm();
+ return true;
}
}
results->raw_stats.passed = true;
next:
- // snaprealm
- return check_inode_snaprealm();
- }
-
- bool check_inode_snaprealm() {
- if (!in->snaprealm)
- return true;
-
- if (!in->snaprealm->have_past_parents_open()) {
- in->snaprealm->open_parents(get_internal_callback(SNAPREALM));
- return false;
- } else {
- return immediate(SNAPREALM, 0);
- }
- }
-
- bool _snaprealm(int rval) {
-
- if (in->snaprealm->past_parents_dirty ||
- !in->get_projected_srnode()->past_parents.empty()) {
- // temporarily store error in field of on-disk inode validation temporarily
- results->inode.checked = true;
- results->inode.passed = false;
- if (in->scrub_infop->header->get_repair()) {
- results->inode.error_str << "Inode has old format snaprealm (will upgrade)";
- results->inode.repaired = true;
- in->mdcache->upgrade_inode_snaprealm(in);
- } else {
- results->inode.error_str << "Inode has old format snaprealm";
- }
- }
return true;
}
vector<inodeno_t> split_inos;
vector<inodeno_t> split_realms;
- for (elist<CInode*>::iterator p = realm->inodes_with_caps.begin(member_offset(CInode, item_caps));
- !p.end();
- ++p)
+ for (auto p = realm->inodes_with_caps.begin(); !p.end(); ++p)
split_inos.push_back((*p)->ino());
for (set<SnapRealm*>::iterator p = realm->open_children.begin();
p != realm->open_children.end();
int peer, int p_flags)
{
SnapRealm *realm = in->find_snaprealm();
- if (realm->have_past_parents_open()) {
- dout(10) << "do_cap_import " << session->info.inst.name << " mseq " << cap->get_mseq() << " on " << *in << dendl;
- if (cap->get_last_seq() == 0) // reconnected cap
- cap->inc_last_seq();
- cap->set_last_issue();
- cap->set_last_issue_stamp(ceph_clock_now());
- cap->clear_new();
- auto reap = make_message<MClientCaps>(
- CEPH_CAP_OP_IMPORT, in->ino(), realm->inode->ino(), cap->get_cap_id(),
- cap->get_last_seq(), cap->pending(), cap->wanted(), 0, cap->get_mseq(),
- mds->get_osd_epoch_barrier());
- in->encode_cap_message(reap, cap);
- reap->snapbl = realm->get_snap_trace();
- reap->set_cap_peer(p_cap_id, p_seq, p_mseq, peer, p_flags);
- mds->send_message_client_counted(reap, session);
- } else {
- ceph_abort();
- }
+ dout(10) << "do_cap_import " << session->info.inst.name << " mseq " << cap->get_mseq() << " on " << *in << dendl;
+ if (cap->get_last_seq() == 0) // reconnected cap
+ cap->inc_last_seq();
+ cap->set_last_issue();
+ cap->set_last_issue_stamp(ceph_clock_now());
+ cap->clear_new();
+ auto reap = make_message<MClientCaps>(CEPH_CAP_OP_IMPORT,
+ in->ino(), realm->inode->ino(), cap->get_cap_id(),
+ cap->get_last_seq(), cap->pending(), cap->wanted(),
+ 0, cap->get_mseq(), mds->get_osd_epoch_barrier());
+ in->encode_cap_message(reap, cap);
+ reap->snapbl = realm->get_snap_trace();
+ reap->set_cap_peer(p_cap_id, p_seq, p_mseq, peer, p_flags);
+ mds->send_message_client_counted(reap, session);
}
void MDCache::do_delayed_cap_imports()
{
dout(10) << "open_snaprealms" << dendl;
- MDSGatherBuilder gather(g_ceph_context);
-
auto it = rejoin_pending_snaprealms.begin();
while (it != rejoin_pending_snaprealms.end()) {
CInode *in = *it;
SnapRealm *realm = in->snaprealm;
ceph_assert(realm);
- if (realm->have_past_parents_open() ||
- realm->open_parents(gather.new_sub())) {
- dout(10) << " past parents now open on " << *in << dendl;
-
- map<client_t,ref_t<MClientSnap>> splits;
- // finish off client snaprealm reconnects?
- map<inodeno_t,map<client_t,snapid_t> >::iterator q = reconnected_snaprealms.find(in->ino());
- if (q != reconnected_snaprealms.end()) {
- for (const auto& r : q->second)
- finish_snaprealm_reconnect(r.first, realm, r.second, splits);
- reconnected_snaprealms.erase(q);
- }
-
- for (elist<CInode*>::iterator p = realm->inodes_with_caps.begin(member_offset(CInode, item_caps));
- !p.end(); ++p) {
- CInode *child = *p;
- auto q = reconnected_caps.find(child->ino());
- ceph_assert(q != reconnected_caps.end());
- for (auto r = q->second.begin(); r != q->second.end(); ++r) {
- Capability *cap = child->get_client_cap(r->first);
- if (!cap)
- continue;
- if (r->second.snap_follows > 0) {
- if (r->second.snap_follows < child->first - 1) {
- rebuild_need_snapflush(child, realm, r->first, r->second.snap_follows);
- } else if (r->second.snapflush) {
- // When processing a cap flush message that is re-sent, it's possble
- // that the sender has already released all WR caps. So we should
- // force MDCache::cow_inode() to setup CInode::client_need_snapflush.
- cap->mark_needsnapflush();
- }
- }
- // make sure client's cap is in the correct snaprealm.
- if (r->second.realm_ino != in->ino()) {
- prepare_realm_split(realm, r->first, child->ino(), splits);
+
+ map<client_t,ref_t<MClientSnap>> splits;
+ // finish off client snaprealm reconnects?
+ auto q = reconnected_snaprealms.find(in->ino());
+ if (q != reconnected_snaprealms.end()) {
+ for (const auto& r : q->second)
+ finish_snaprealm_reconnect(r.first, realm, r.second, splits);
+ reconnected_snaprealms.erase(q);
+ }
+
+ for (auto p = realm->inodes_with_caps.begin(); !p.end(); ++p) {
+ CInode *child = *p;
+ auto q = reconnected_caps.find(child->ino());
+ ceph_assert(q != reconnected_caps.end());
+ for (auto r = q->second.begin(); r != q->second.end(); ++r) {
+ Capability *cap = child->get_client_cap(r->first);
+ if (!cap)
+ continue;
+ if (r->second.snap_follows > 0) {
+ if (r->second.snap_follows < child->first - 1) {
+ rebuild_need_snapflush(child, realm, r->first, r->second.snap_follows);
+ } else if (r->second.snapflush) {
+ // When processing a cap flush message that is re-sent, it's possble
+ // that the sender has already released all WR caps. So we should
+ // force MDCache::cow_inode() to setup CInode::client_need_snapflush.
+ cap->mark_needsnapflush();
}
}
+ // make sure client's cap is in the correct snaprealm.
+ if (r->second.realm_ino != in->ino()) {
+ prepare_realm_split(realm, r->first, child->ino(), splits);
+ }
}
-
- rejoin_pending_snaprealms.erase(it++);
- in->put(CInode::PIN_OPENINGSNAPPARENTS);
-
- send_snaps(splits);
- } else {
- dout(10) << " opening past parents on " << *in << dendl;
- ++it;
}
- }
- if (gather.has_subs()) {
- if (gather.num_subs_remaining() == 0) {
- // cleanup gather
- gather.set_finisher(new C_MDSInternalNoop);
- gather.activate();
- } else {
- // for multimds, must succeed the first time
- ceph_assert(recovery_set.empty());
+ rejoin_pending_snaprealms.erase(it++);
+ in->put(CInode::PIN_OPENINGSNAPPARENTS);
- dout(10) << "open_snaprealms - waiting for "
- << gather.num_subs_remaining() << dendl;
- gather.set_finisher(new C_MDC_OpenSnapRealms(this));
- gather.activate();
- return;
- }
+ send_snaps(splits);
}
notify_global_snaprealm_update(CEPH_SNAP_OP_UPDATE);
if (cur->state_test(CInode::STATE_PURGING))
return -ESTALE;
- // make sure snaprealm are open...
- if (mdr && cur->snaprealm && !cur->snaprealm->have_past_parents_open() &&
- !cur->snaprealm->open_parents(cf.build())) {
- return 1;
- }
-
if (flags & MDS_TRAVERSE_CHECK_LOCKCACHE)
mds->locker->find_and_attach_lock_cache(mdr, cur);
}
cur = in;
- // make sure snaprealm are open...
- if (mdr && cur->snaprealm && !cur->snaprealm->have_past_parents_open() &&
- !cur->snaprealm->open_parents(cf.build())) {
- return 1;
- }
if (rdlock_snap && !(want_dentry && depth == path.depth() - 1)) {
lov.clear();
case CEPH_MDS_OP_REPAIR_INODESTATS:
repair_inode_stats_work(mdr);
break;
- case CEPH_MDS_OP_UPGRADE_SNAPREALM:
- upgrade_inode_snaprealm_work(mdr);
- break;
default:
ceph_abort();
}
vector<inodeno_t> split_realms;
if (notify_clients) {
- ceph_assert(in->snaprealm->have_past_parents_open());
if (snapop == CEPH_SNAP_OP_SPLIT) {
// notify clients of update|split
- for (elist<CInode*>::iterator p = in->snaprealm->inodes_with_caps.begin(member_offset(CInode, item_caps));
- !p.end(); ++p)
+ for (auto p = in->snaprealm->inodes_with_caps.begin(); !p.end(); ++p)
split_inos.push_back((*p)->ino());
- for (set<SnapRealm*>::iterator p = in->snaprealm->open_children.begin();
- p != in->snaprealm->open_children.end();
- ++p)
- split_realms.push_back((*p)->inode->ino());
+ for (auto& r : in->snaprealm->open_children)
+ split_realms.push_back(r->inode->ino());
}
}
- set<SnapRealm*> past_children;
map<client_t, ref_t<MClientSnap>> updates;
list<SnapRealm*> q;
q.push_back(in->snaprealm);
}
}
- if (snapop == CEPH_SNAP_OP_UPDATE || snapop == CEPH_SNAP_OP_DESTROY) {
- for (set<SnapRealm*>::iterator p = realm->open_past_children.begin();
- p != realm->open_past_children.end();
- ++p)
- past_children.insert(*p);
- }
-
// notify for active children, too.
dout(10) << " " << realm << " open_children are " << realm->open_children << dendl;
- for (set<SnapRealm*>::iterator p = realm->open_children.begin();
- p != realm->open_children.end();
- ++p)
- q.push_back(*p);
+ for (auto& r : realm->open_children)
+ q.push_back(r);
}
if (notify_clients)
send_snaps(updates);
-
- // notify past children and their descendants if we update/delete old snapshots
- for (set<SnapRealm*>::iterator p = past_children.begin();
- p != past_children.end();
- ++p)
- q.push_back(*p);
-
- while (!q.empty()) {
- SnapRealm *realm = q.front();
- q.pop_front();
-
- realm->invalidate_cached_snaps();
-
- for (set<SnapRealm*>::iterator p = realm->open_children.begin();
- p != realm->open_children.end();
- ++p) {
- if (past_children.count(*p) == 0)
- q.push_back(*p);
- }
-
- for (set<SnapRealm*>::iterator p = realm->open_past_children.begin();
- p != realm->open_past_children.end();
- ++p) {
- if (past_children.count(*p) == 0) {
- q.push_back(*p);
- past_children.insert(*p);
- }
- }
- }
-
- if (snapop == CEPH_SNAP_OP_DESTROY) {
- // eval stray inodes if we delete snapshot from their past ancestor snaprealm
- for (set<SnapRealm*>::iterator p = past_children.begin();
- p != past_children.end();
- ++p)
- maybe_eval_stray((*p)->inode, true);
- }
}
void MDCache::send_snap_update(CInode *in, version_t stid, int snap_op)
bool hadrealm = (in->snaprealm ? true : false);
in->decode_snap_blob(m->snapbl);
ceph_assert(in->snaprealm);
- ceph_assert(in->snaprealm->have_past_parents_open());
if (!hadrealm)
do_realm_invalidate_and_update_notify(in, CEPH_SNAP_OP_SPLIT, false);
}
header->set_origin(in);
- Context *fin;
- if (header->get_recursive()) {
- header->get_origin()->get(CInode::PIN_SCRUBQUEUE);
- fin = new MDSInternalContextWrapper(mds,
- new LambdaContext([this, header](int r) {
- recursive_scrub_finish(header);
- header->get_origin()->put(CInode::PIN_SCRUBQUEUE);
- })
- );
- } else {
+ Context *fin = nullptr;
+ if (!header->get_recursive())
fin = cs->take_finisher();
- }
// If the scrub did some repair, then flush the journal at the end of
// the scrub. Otherwise in the case of e.g. rewriting a backtrace
return;
}
-void MDCache::recursive_scrub_finish(const ScrubHeaderRef& header)
-{
- if (header->get_origin()->is_base() &&
- header->get_force() && header->get_repair()) {
- // notify snapserver that base directory is recursively scrubbed.
- // After both root and mdsdir are recursively scrubbed, snapserver
- // knows that all old format snaprealms are converted to the new
- // format.
- if (mds->mdsmap->get_num_in_mds() == 1 &&
- mds->mdsmap->get_num_failed_mds() == 0 &&
- mds->mdsmap->get_tableserver() == mds->get_nodeid()) {
- mds->mark_base_recursively_scrubbed(header->get_origin()->ino());
- }
- }
-}
-
struct C_MDC_RespondInternalRequest : public MDCacheLogContext {
MDRequestRef mdr;
C_MDC_RespondInternalRequest(MDCache *c, MDRequestRef& m) :
mds->server->respond_to_request(mdr, 0);
}
-void MDCache::upgrade_inode_snaprealm(CInode *in)
-{
- MDRequestRef mdr = request_start_internal(CEPH_MDS_OP_UPGRADE_SNAPREALM);
- mdr->pin(in);
- mdr->internal_op_private = in;
- mdr->internal_op_finish = new C_MDSInternalNoop;
- upgrade_inode_snaprealm_work(mdr);
-}
-
-void MDCache::upgrade_inode_snaprealm_work(MDRequestRef& mdr)
-{
- CInode *in = static_cast<CInode*>(mdr->internal_op_private);
- dout(10) << __func__ << " " << *in << dendl;
-
- if (!in->is_auth()) {
- mds->server->respond_to_request(mdr, -ESTALE);
- return;
- }
-
- MutationImpl::LockOpVec lov;
- lov.add_xlock(&in->snaplock);
- if (!mds->locker->acquire_locks(mdr, lov))
- return;
-
- // project_snaprealm() upgrades snaprealm format
- auto pi = in->project_inode(mdr, false, true);
- pi.inode->version = in->pre_dirty();
-
- mdr->ls = mds->mdlog->get_current_segment();
- EUpdate *le = new EUpdate(mds->mdlog, "upgrade_snaprealm");
- mds->mdlog->start_entry(le);
-
- if (in->is_base()) {
- le->metablob.add_root(true, in);
- } else {
- CDentry *pdn = in->get_projected_parent_dn();
- le->metablob.add_dir_context(pdn->get_dir());
- le->metablob.add_primary_dentry(pdn, in, true);
- }
-
- mds->mdlog->submit_entry(le, new C_MDC_RespondInternalRequest(this, mdr));
-}
-
void MDCache::flush_dentry(std::string_view path, Context *fin)
{
if (is_readonly()) {
Formatter *f, Context *fin);
void repair_inode_stats(CInode *diri);
void repair_dirfrag_stats(CDir *dir);
- void upgrade_inode_snaprealm(CInode *in);
// my leader
MDSRank *mds;
* long time)
*/
void enqueue_scrub_work(MDRequestRef& mdr);
- void recursive_scrub_finish(const ScrubHeaderRef& header);
void repair_inode_stats_work(MDRequestRef& mdr);
void repair_dirfrag_stats_work(MDRequestRef& mdr);
- void upgrade_inode_snaprealm_work(MDRequestRef& mdr);
ceph::unordered_map<inodeno_t,CInode*> inode_map; // map of head inodes by ino
map<vinodeno_t, CInode*> snap_inode_map; // map of snap inodes by ino
already_sent = true;
}
-void MDSRank::mark_base_recursively_scrubbed(inodeno_t ino)
-{
- if (mdsmap->get_tableserver() == whoami)
- snapserver->mark_base_recursively_scrubbed(ino);
-}
-
void MDSRankDispatcher::tick()
{
heartbeat_reset();
const std::string& option, const std::string& value,
std::ostream& ss);
- void mark_base_recursively_scrubbed(inodeno_t ino);
-
// Reference to global MDS::mds_lock, so that users of MDSRank don't
// carry around references to the outer MDS, and we can substitute
// a separate lock here in future potentially.
return;
}
- if (mdr && in->snaprealm && !in->snaprealm->have_past_parents_open() &&
- !in->snaprealm->open_parents(new C_MDS_RetryRequest(mdcache, mdr))) {
- return;
- }
-
// check for nothing (not read or write); this still applies the
// path check.
if (!check_access(mdr, in, 0))
new_realm = !in->snaprealm;
in->decode_snap_blob(mdr->peer_request->desti_snapbl);
ceph_assert(in->snaprealm);
- ceph_assert(in->snaprealm->have_past_parents_open());
} else {
new_realm = false;
}
new_oldin_snaprealm = !oldin->snaprealm;
oldin->decode_snap_blob(mdr->peer_request->desti_snapbl);
ceph_assert(oldin->snaprealm);
- ceph_assert(oldin->snaprealm->have_past_parents_open());
}
}
new_in_snaprealm = !in->snaprealm;
in->decode_snap_blob(mdr->peer_request->srci_snapbl);
ceph_assert(in->snaprealm);
- ceph_assert(in->snaprealm->have_past_parents_open());
}
}
}
respond_to_request(mdr, 0);
// purge snapshot data
- if (diri->snaprealm->have_past_parents_open())
- diri->purge_stale_snap_data(diri->snaprealm->get_snaps());
+ diri->purge_stale_snap_data(diri->snaprealm->get_snaps());
}
struct C_MDS_renamesnap_finish : public ServerLogContext {
out << " snaps=" << realm.srnode.snaps;
out << " past_parent_snaps=" << realm.srnode.past_parent_snaps;
- if (realm.srnode.past_parents.size()) {
- out << " past_parents=(";
- for (map<snapid_t, snaplink_t>::const_iterator p = realm.srnode.past_parents.begin();
- p != realm.srnode.past_parents.end();
- ++p) {
- if (p != realm.srnode.past_parents.begin()) out << ",";
- out << p->second.first << "-" << p->first
- << "=" << p->second.ino;
- }
- out << ")";
- }
-
if (realm.srnode.is_parent_global())
out << " global ";
out << " " << &realm << ")";
}
SnapRealm::SnapRealm(MDCache *c, CInode *in) :
- mdcache(c), inode(in)
+ mdcache(c), inode(in), inodes_with_caps(member_offset(CInode, item_caps))
{
global = (inode->ino() == MDS_INO_GLOBAL_SNAPREALM);
}
-void SnapRealm::add_open_past_parent(SnapRealm *parent, snapid_t last)
-{
- auto p = open_past_parents.find(parent->inode->ino());
- if (p != open_past_parents.end()) {
- ceph_assert(p->second.second.count(last) == 0);
- p->second.second.insert(last);
- } else {
- open_past_parents[parent->inode->ino()].first = parent;
- open_past_parents[parent->inode->ino()].second.insert(last);
- parent->open_past_children.insert(this);
- parent->inode->get(CInode::PIN_PASTSNAPPARENT);
- }
- ++num_open_past_parents;
-}
-
-void SnapRealm::remove_open_past_parent(inodeno_t ino, snapid_t last)
-{
- auto p = open_past_parents.find(ino);
- ceph_assert(p != open_past_parents.end());
- auto q = p->second.second.find(last);
- ceph_assert(q != p->second.second.end());
- p->second.second.erase(q);
- --num_open_past_parents;
- if (p->second.second.empty()) {
- SnapRealm *parent = p->second.first;
- open_past_parents.erase(p);
- parent->open_past_children.erase(this);
- parent->inode->put(CInode::PIN_PASTSNAPPARENT);
- }
-}
-
-struct C_SR_RetryOpenParents : public MDSContext {
- SnapRealm *sr;
- snapid_t first, last, parent_last;
- inodeno_t parent;
- MDSContext* fin;
- C_SR_RetryOpenParents(SnapRealm *s, snapid_t f, snapid_t l, snapid_t pl,
- inodeno_t p, MDSContext *c) :
- sr(s), first(f), last(l), parent_last(pl), parent(p), fin(c) {
- sr->inode->get(CInode::PIN_OPENINGSNAPPARENTS);
- }
- MDSRank *get_mds() override { return sr->mdcache->mds; }
- void finish(int r) override {
- if (r < 0)
- sr->_remove_missing_parent(parent_last, parent, r);
- if (sr->_open_parents(fin, first, last)) {
- if (fin)
- fin->complete(0);
- }
- sr->inode->put(CInode::PIN_OPENINGSNAPPARENTS);
- }
-};
-
-void SnapRealm::_remove_missing_parent(snapid_t snapid, inodeno_t parent, int err)
-{
- map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.find(snapid);
- if (p != srnode.past_parents.end()) {
- dout(10) << __func__ << " " << parent << " [" << p->second.first << ","
- << p->first << "] errno " << err << dendl;
- srnode.past_parents.erase(p);
- past_parents_dirty = true;
- } else {
- dout(10) << __func__ << " " << parent << " not found" << dendl;
- }
-}
-
-bool SnapRealm::_open_parents(MDSContext *finish, snapid_t first, snapid_t last)
-{
- dout(10) << "open_parents [" << first << "," << last << "]" << dendl;
- if (open)
- return true;
-
- // make sure my current parents' parents are open...
- if (parent) {
- dout(10) << " current parent [" << srnode.current_parent_since << ",head] is " << *parent
- << " on " << *parent->inode << dendl;
- if (last >= srnode.current_parent_since &&
- !parent->_open_parents(finish, std::max(first, srnode.current_parent_since), last))
- return false;
- }
-
- if (!srnode.past_parent_snaps.empty())
- ceph_assert(mdcache->mds->snapclient->get_cached_version() > 0);
-
- if (!srnode.past_parents.empty() &&
- mdcache->mds->allows_multimds_snaps()) {
- dout(10) << " skip non-empty past_parents since multimds_snaps is allowed" << dendl;
- open = true;
- return true;
- }
-
- // and my past parents too!
- ceph_assert(srnode.past_parents.size() >= num_open_past_parents);
- if (srnode.past_parents.size() > num_open_past_parents) {
- for (map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.begin();
- p != srnode.past_parents.end(); ) {
- dout(10) << " past_parent [" << p->second.first << "," << p->first << "] is "
- << p->second.ino << dendl;
- CInode *parent = mdcache->get_inode(p->second.ino);
- if (!parent) {
- C_SR_RetryOpenParents *fin = new C_SR_RetryOpenParents(this, first, last, p->first,
- p->second.ino, finish);
- mdcache->open_ino(p->second.ino, mdcache->mds->mdsmap->get_metadata_pool(), fin);
- return false;
- }
- if (parent->state_test(CInode::STATE_PURGING)) {
- dout(10) << " skip purging past_parent " << *parent << dendl;
- srnode.past_parents.erase(p++);
- past_parents_dirty = true;
- continue;
- }
- ceph_assert(parent->snaprealm); // hmm!
- if (!parent->snaprealm->_open_parents(finish, p->second.first, p->first))
- return false;
- auto q = open_past_parents.find(p->second.ino);
- if (q == open_past_parents.end() ||
- q->second.second.count(p->first) == 0) {
- add_open_past_parent(parent->snaprealm, p->first);
- }
- ++p;
- }
- }
-
- open = true;
- return true;
-}
-
-bool SnapRealm::open_parents(MDSContext *retryorfinish) {
- if (!_open_parents(retryorfinish))
- return false;
- delete retryorfinish;
- return true;
-}
-
-bool SnapRealm::have_past_parents_open(snapid_t first, snapid_t last) const
-{
- dout(10) << "have_past_parents_open [" << first << "," << last << "]" << dendl;
- if (open)
- return true;
-
- if (!srnode.past_parent_snaps.empty())
- ceph_assert(mdcache->mds->snapclient->get_cached_version() > 0);
-
- if (!srnode.past_parents.empty() &&
- mdcache->mds->allows_multimds_snaps()) {
- dout(10) << " skip non-empty past_parents since multimds_snaps is allowed" << dendl;
- open = true;
- return true;
- }
-
- for (auto p = srnode.past_parents.lower_bound(first);
- p != srnode.past_parents.end();
- ++p) {
- if (p->second.first > last)
- break;
- dout(10) << " past parent [" << p->second.first << "," << p->first << "] was "
- << p->second.ino << dendl;
- auto q = open_past_parents.find(p->second.ino);
- if (q == open_past_parents.end()) {
- dout(10) << " past parent " << p->second.ino << " is not open" << dendl;
- return false;
- }
- SnapRealm *parent_realm = q->second.first;
- if (!parent_realm->have_past_parents_open(std::max(first, p->second.first),
- std::min(last, p->first)))
- return false;
- }
-
- open = true;
- return true;
-}
-
-void SnapRealm::close_parents()
-{
- for (auto p = open_past_parents.begin(); p != open_past_parents.end(); ++p) {
- num_open_past_parents -= p->second.second.size();
- p->second.first->inode->put(CInode::PIN_PASTSNAPPARENT);
- p->second.first->open_past_children.erase(this);
- }
- open_past_parents.clear();
-}
-
-
/*
* get list of snaps for this realm. we must include parents' snaps
* for the intervals during which they were our parent.
cached_last_created = std::max(cached_last_created, last);
}
cached_snaps.insert(snaps.begin(), snaps.end());
- } else {
- // include snaps for parents
- for (const auto& p : srnode.past_parents) {
- const CInode *oldparent = mdcache->get_inode(p.second.ino);
- ceph_assert(oldparent); // call open_parents first!
- ceph_assert(oldparent->snaprealm);
-
- const set<snapid_t>& snaps = oldparent->snaprealm->get_snaps();
- snapid_t last = 0;
- for (auto q = snaps.lower_bound(p.second.first);
- q != snaps.end() && *q <= p.first;
- q++) {
- cached_snaps.insert(*q);
- last = *q;
- }
- cached_seq = std::max(cached_seq, last);
- cached_last_created = std::max(cached_last_created, last);
- }
}
snapid_t parent_seq = parent ? parent->get_newest_seq() : snapid_t(0);
void SnapRealm::check_cache() const
{
- ceph_assert(have_past_parents_open());
snapid_t seq;
snapid_t last_created;
snapid_t last_destroyed = mdcache->mds->snapclient->get_last_destroyed();
map<snapid_t, const SnapInfo*> _infomap;
mdcache->mds->snapclient->get_snap_infos(_infomap, snaps);
infomap.insert(_infomap.begin(), _infomap.end());
- } else {
- // include snaps for parents during intervals that intersect [first,last]
- for (map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.lower_bound(first);
- p != srnode.past_parents.end() && p->first >= first && p->second.first <= last;
- ++p) {
- CInode *oldparent = mdcache->get_inode(p->second.ino);
- ceph_assert(oldparent); // call open_parents first!
- ceph_assert(oldparent->snaprealm);
- oldparent->snaprealm->get_snap_info(infomap,
- std::max(first, p->second.first),
- std::min(last, p->first));
- }
}
if (srnode.current_parent_since <= last && parent)
return sinfo->get_long_name();
}
}
- } else {
- map<snapid_t,snaplink_t>::iterator p = srnode.past_parents.lower_bound(snapid);
- if (p != srnode.past_parents.end() && p->second.first <= snapid) {
- CInode *oldparent = mdcache->get_inode(p->second.ino);
- ceph_assert(oldparent); // call open_parents first!
- ceph_assert(oldparent->snaprealm);
- return oldparent->snaprealm->get_snapname(snapid, atino);
- }
}
ceph_assert(srnode.current_parent_since <= snapid);
if (!actual && it.second->name == pname && it.second->ino == pino)
return it.first;
}
- } else {
- // include snaps for parents during intervals that intersect [first,last]
- for (map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.lower_bound(first);
- p != srnode.past_parents.end() && p->first >= first && p->second.first <= last;
- ++p) {
- CInode *oldparent = mdcache->get_inode(p->second.ino);
- ceph_assert(oldparent); // call open_parents first!
- ceph_assert(oldparent->snaprealm);
- snapid_t r = oldparent->snaprealm->resolve_snapname(n, atino,
- std::max(first, p->second.first),
- std::min(last, p->first));
- if (r)
- return r;
- }
}
if (parent && srnode.current_parent_since <= last)
}
// split inodes_with_caps
- for (elist<CInode*>::iterator p = inodes_with_caps.begin(member_offset(CInode, item_caps));
- !p.end(); ) {
+ for (auto p = inodes_with_caps.begin(); !p.end(); ) {
CInode *in = *p;
++p;
// does inode fall within the child realm?
newparent = parent;
dout(10) << "merge to " << *newparent << " on " << *newparent->inode << dendl;
- ceph_assert(open_past_children.empty());
-
dout(10) << " open_children are " << open_children << dendl;
for (auto realm : open_children) {
dout(20) << " child realm " << *realm << " on " << *realm->inode << dendl;
}
open_children.clear();
- elist<CInode*>::iterator p = inodes_with_caps.begin(member_offset(CInode, item_caps));
- while (!p.end()) {
+ for (auto p = inodes_with_caps.begin(); !p.end(); ) {
CInode *in = *p;
++p;
in->move_to_realm(newparent);
auto p = past.lower_bound(srnode.current_parent_since);
past.erase(p, past.end());
}
- } else if (!srnode.past_parents.empty()) {
- const set<snapid_t>& snaps = get_snaps();
- for (const auto& p : srnode.past_parents) {
- for (auto q = snaps.lower_bound(p.second.first);
- q != snaps.end() && *q <= p.first;
- q++) {
- if (srnode.snaps.count(*q))
- continue;
- past.insert(*q);
- }
- }
}
if (!past.empty()) {
cached_snap_trace.append(parent->get_snap_trace());
}
-void SnapRealm::prune_past_parents()
+void SnapRealm::prune_past_parent_snaps()
{
- dout(10) << "prune_past_parents" << dendl;
+ dout(10) << __func__ << dendl;
check_cache();
- // convert past_parents to past_parent_snaps
- if (!srnode.past_parents.empty()) {
- for (auto p = cached_snaps.begin();
- p != cached_snaps.end() && *p < srnode.current_parent_since;
- ++p) {
- if (!srnode.snaps.count(*p))
- srnode.past_parent_snaps.insert(*p);
- }
- srnode.past_parents.clear();
- past_parents_dirty = true;
- }
-
for (auto p = srnode.past_parent_snaps.begin();
p != srnode.past_parent_snaps.end(); ) {
auto q = cached_snaps.find(*p);
if (q == cached_snaps.end()) {
- dout(10) << "prune_past_parents pruning " << *p << dendl;
+ dout(10) << __func__ << " pruning " << *p << dendl;
srnode.past_parent_snaps.erase(p++);
} else {
- dout(10) << "prune_past_parents keeping " << *p << dendl;
+ dout(10) << __func__ << " keeping " << *p << dendl;
++p;
}
}
return false;
}
- bool _open_parents(MDSContext *retryorfinish, snapid_t first=1, snapid_t last=CEPH_NOSNAP);
- bool open_parents(MDSContext *retryorfinish);
- void _remove_missing_parent(snapid_t snapid, inodeno_t parent, int err);
- bool have_past_parents_open(snapid_t first=1, snapid_t last=CEPH_NOSNAP) const;
- void add_open_past_parent(SnapRealm *parent, snapid_t last);
- void remove_open_past_parent(inodeno_t ino, snapid_t last);
- void close_parents();
-
- void prune_past_parents();
- bool has_past_parents() const {
- return !srnode.past_parent_snaps.empty() ||
- !srnode.past_parents.empty();
+ void prune_past_parent_snaps();
+ bool has_past_parent_snaps() const {
+ return !srnode.past_parent_snaps.empty();
}
void build_snap_set() const;
MDCache *mdcache;
CInode *inode;
- bool past_parents_dirty = false;
-
SnapRealm *parent = nullptr;
std::set<SnapRealm*> open_children; // active children that are currently open
- std::set<SnapRealm*> open_past_children; // past children who has pinned me
- elist<CInode*> inodes_with_caps = 0; // for efficient realm splits
+ elist<CInode*> inodes_with_caps; // for efficient realm splits
std::map<client_t, xlist<Capability*>* > client_caps; // to identify clients who need snap notifications
protected:
void check_cache() const;
private:
- mutable bool open = false; // set to true once all past_parents are opened
bool global;
- std::map<inodeno_t, std::pair<SnapRealm*, std::set<snapid_t>>> open_past_parents; // these are explicitly pinned.
- unsigned num_open_past_parents = 0;
-
// cache
mutable snapid_t cached_seq; // max seq over self and all past+present parents.
mutable snapid_t cached_last_created; // max last_created over all past+present parents
void check_osd_map(bool force);
- void mark_base_recursively_scrubbed(inodeno_t ino) {
- if (ino == MDS_INO_ROOT)
- root_scrubbed = true;
- else if (ino == MDS_INO_MDSDIR(rank))
- mdsdir_scrubbed = true;
- else
- ceph_abort();
- }
bool can_allow_multimds_snaps() const {
- return (root_scrubbed && mdsdir_scrubbed) ||
- snaps.empty() || snaps.begin()->first >= snaprealm_v2_since;
+ return snaps.empty() || snaps.begin()->first >= snaprealm_v2_since;
}
void encode(bufferlist& bl) const {
set<version_t> pending_noop;
version_t last_checked_osdmap = 0;
-
- bool root_scrubbed = false; // all snaprealms under root are converted?
- bool mdsdir_scrubbed = false; // all snaprealms under ~mds0 are converted?
};
WRITE_CLASS_ENCODER(SnapServer)
return;
}
- CInode *in = dn->get_linkage()->get_inode();
- if (in->snaprealm &&
- !in->snaprealm->have_past_parents_open() &&
- !in->snaprealm->open_parents(new C_RetryEnqueue(this, dn, trunc))) {
- // this can happen if the dentry had been trimmed from cache.
- return;
- }
-
dn->get_dir()->auth_pin(this);
-
if (trunc) {
truncate(dn);
} else {
// only important for directories. normal file data snaps are handled
// by the object store.
if (in->snaprealm) {
- if (!in->snaprealm->have_past_parents_open() &&
- !in->snaprealm->open_parents(new C_MDC_EvalStray(this, dn))) {
- return false;
- }
- in->snaprealm->prune_past_parents();
+ in->snaprealm->prune_past_parent_snaps();
in->purge_stale_snap_data(in->snaprealm->get_snaps());
}
if (in->is_dir()) {
- if (in->snaprealm && in->snaprealm->has_past_parents()) {
+ if (in->snaprealm && in->snaprealm->has_past_parent_snaps()) {
dout(20) << " directory has past parents "
<< in->snaprealm << dendl;
if (in->state_test(CInode::STATE_MISSINGOBJS)) {
return false;
}
// don't purge multiversion inode with snap data
- if (in->snaprealm && in->snaprealm->has_past_parents() &&
+ if (in->snaprealm && in->snaprealm->has_past_parent_snaps() &&
in->is_any_old_inodes()) {
// A file with snapshots: we will truncate the HEAD revision
// but leave the metadata intact.