this avoid copying whole inode_t and xattr map when journaling inodes.
Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
if (linkage.is_primary()) {
c = 1;
encode(c, bl);
- encode(linkage.get_inode()->inode.ino, bl);
+ encode(linkage.get_inode()->ino(), bl);
}
else if (linkage.is_remote()) {
c = 2;
CDentry::linkage_t *dnl = i->second->get_linkage();
if (dnl->is_primary()) {
CInode *in = dnl->get_inode();
- nest_info.add(in->inode.accounted_rstat);
+ nest_info.add(in->get_inode()->accounted_rstat);
if (in->is_dir())
frag_info.nsubdirs++;
else
CDentry *dn = i->second;
if (dn->get_linkage()->is_primary()) {
CInode *in = dn->get_linkage()->inode;
- dout(1) << *dn << " rstat " << in->inode.accounted_rstat << dendl;
+ dout(1) << *dn << " rstat " << in->get_inode()->accounted_rstat << dendl;
} else {
dout(1) << *dn << dendl;
}
if (dn->get_linkage()->is_primary()) {
CInode *in = dn->get_linkage()->get_inode();
- auto pi = in->get_projected_inode();
+ const auto& pi = in->get_projected_inode();
if (in->is_dir()) {
fnode.fragstat.nsubdirs++;
if (in->item_pop_lru.is_on_list())
void CDir::resync_accounted_fragstat()
{
fnode_t *pf = get_projected_fnode();
- auto pi = inode->get_projected_inode();
+ const auto& pi = inode->get_projected_inode();
if (pf->accounted_fragstat.version != pi->dirstat.version) {
pf->fragstat.version = pi->dirstat.version;
void CDir::resync_accounted_rstat()
{
fnode_t *pf = get_projected_fnode();
- auto pi = inode->get_projected_inode();
+ const auto& pi = inode->get_projected_inode();
if (pf->accounted_rstat.version != pi->rstat.version) {
pf->rstat.version = pi->rstat.version;
if (in->is_frozen())
continue;
- auto &pi = in->project_inode();
- pi.inode.version = in->pre_dirty();
+ auto pi = in->project_inode();
+ pi.inode->version = in->pre_dirty();
inode->mdcache->project_rstat_inode_to_frag(in, this, 0, 0, NULL);
}
undef_inode = true;
} else if (committed_version == 0 &&
dn->is_dirty() &&
- inode_data.inode.ino == in->ino() &&
- inode_data.inode.version == in->get_version()) {
+ inode_data.inode->ino == in->ino() &&
+ inode_data.inode->version == in->get_version()) {
/* clean underwater item?
* Underwater item is something that is dirty in our cache from
* journal replay, but was previously flushed to disk before the
if (!dn || undef_inode) {
// add inode
- CInode *in = cache->get_inode(inode_data.inode.ino, last);
+ CInode *in = cache->get_inode(inode_data.inode->ino, last);
if (!in || undef_inode) {
if (undef_inode && in)
in->first = first;
else
in = new CInode(cache, true, first, last);
- in->inode = inode_data.inode;
+ in->reset_inode(std::move(inode_data.inode));
+ in->reset_xattrs(std::move(inode_data.xattrs));
// symlink?
if (in->is_symlink())
in->symlink = inode_data.symlink;
in->dirfragtree.swap(inode_data.dirfragtree);
- in->xattrs.swap(inode_data.xattrs);
- in->old_inodes.swap(inode_data.old_inodes);
- if (!in->old_inodes.empty()) {
- snapid_t min_first = in->old_inodes.rbegin()->first + 1;
+ in->reset_old_inodes(std::move(inode_data.old_inodes));
+ if (in->is_any_old_inodes()) {
+ snapid_t min_first = in->get_old_inodes()->rbegin()->first + 1;
if (min_first > in->first)
in->first = min_first;
}
}
dout(12) << "_fetched got " << *dn << " " << *in << dendl;
- if (in->inode.is_dirty_rstat())
+ if (in->get_inode()->is_dirty_rstat())
in->mark_dirty_rstat();
in->maybe_ephemeral_rand(true, rand_threshold);
dn = add_primary_dentry(dname, in, first, last);
} else {
dout(0) << "_fetched badness: got (but i already had) " << *in
- << " mode " << in->inode.mode
- << " mtime " << in->inode.mtime << dendl;
+ << " mode " << in->get_inode()->mode
+ << " mtime " << in->get_inode()->mtime << dendl;
string dirpath, inopath;
this->inode->make_path_string(dirpath);
in->make_path_string(inopath);
- cache->mds->clog->error() << "loaded dup inode " << inode_data.inode.ino
- << " [" << first << "," << last << "] v" << inode_data.inode.version
+ cache->mds->clog->error() << "loaded dup inode " << inode_data.inode->ino
+ << " [" << first << "," << last << "] v" << inode_data.inode->version
<< " at " << dirpath << "/" << dname
- << ", but inode " << in->vino() << " v" << in->inode.version
+ << ", but inode " << in->vino() << " v" << in->get_version()
<< " already exists at " << inopath;
return dn;
}
#define dout_context g_ceph_context
#define dout_subsys ceph_subsys_mds
#undef dout_prefix
-#define dout_prefix *_dout << "mds." << mdcache->mds->get_nodeid() << ".cache.ino(" << inode.ino << ") "
+#define dout_prefix *_dout << "mds." << mdcache->mds->get_nodeid() << ".cache.ino(" << ino() << ") "
class CInodeIOContext : public MDSIOContextBase
//int cinode_pins[CINODE_NUM_PINS]; // counts
ostream& CInode::print_db_line_prefix(ostream& out)
{
- return out << ceph_clock_now() << " mds." << mdcache->mds->get_nodeid() << ".cache.ino(" << inode.ino << ") ";
+ return out << ceph_clock_now() << " mds." << mdcache->mds->get_nodeid() << ".cache.ino(" << ino() << ") ";
}
/*
string path;
in.make_path_string(path, true);
- out << "[inode " << in.inode.ino;
+ out << "[inode " << in.ino();
out << " ["
<< (in.is_multiversion() ? "...":"")
<< in.first << "," << in.last << "]";
if (in.is_frozen_inode()) out << " FROZEN";
if (in.is_frozen_auth_pin()) out << " FROZEN_AUTHPIN";
- const CInode::mempool_inode *pi = in.get_projected_inode();
+ const auto& pi = in.get_projected_inode();
if (pi->is_truncating())
out << " truncating(" << pi->truncate_from << " to " << pi->truncate_size << ")";
- if (in.inode.is_dir()) {
- out << " " << in.inode.dirstat;
+ if (in.is_dir()) {
+ out << " " << in.get_inode()->dirstat;
if (g_conf()->mds_debug_scatterstat && in.is_projected()) {
- const CInode::mempool_inode *pi = in.get_projected_inode();
out << "->" << pi->dirstat;
}
} else {
- out << " s=" << in.inode.size;
- if (in.inode.nlink != 1)
- out << " nl=" << in.inode.nlink;
+ out << " s=" << in.get_inode()->size;
+ if (in.get_inode()->nlink != 1)
+ out << " nl=" << in.get_inode()->nlink;
}
// rstat
- out << " " << in.inode.rstat;
- if (!(in.inode.rstat == in.inode.accounted_rstat))
- out << "/" << in.inode.accounted_rstat;
+ out << " " << in.get_inode()->rstat;
+ if (!(in.get_inode()->rstat == in.get_inode()->accounted_rstat))
+ out << "/" << in.get_inode()->accounted_rstat;
if (g_conf()->mds_debug_scatterstat && in.is_projected()) {
- const CInode::mempool_inode *pi = in.get_projected_inode();
out << "->" << pi->rstat;
if (!(pi->rstat == pi->accounted_rstat))
out << "/" << pi->accounted_rstat;
}
+ if (in.is_any_old_inodes()) {
+ out << " old_inodes=" << in.get_old_inodes()->size();
+ }
+
if (!in.client_need_snapflush.empty())
out << " need_snapflush=" << in.client_need_snapflush;
-
// locks
if (!in.authlock.is_sync_and_unlocked())
out << " " << in.authlock;
if (!in.linklock.is_sync_and_unlocked())
out << " " << in.linklock;
- if (in.inode.is_dir()) {
+ if (in.get_inode()->is_dir()) {
if (!in.dirfragtreelock.is_sync_and_unlocked())
out << " " << in.dirfragtreelock;
if (!in.snaplock.is_sync_and_unlocked())
out << " " << in.versionlock;
// hack: spit out crap on which clients have caps
- if (in.inode.client_ranges.size())
- out << " cr=" << in.inode.client_ranges;
+ if (in.get_inode()->client_ranges.size())
+ out << " cr=" << in.get_inode()->client_ranges;
if (!in.get_client_caps().empty()) {
out << " caps={";
in.print_pin_set(out);
}
- if (in.inode.export_pin != MDS_RANK_NONE) {
- out << " export_pin=" << in.inode.export_pin;
+ if (in.get_inode()->export_pin != MDS_RANK_NONE) {
+ out << " export_pin=" << in.get_inode()->export_pin;
}
if (in.state_test(CInode::STATE_DISTEPHEMERALPIN)) {
out << " distepin";
return out;
}
-CInode::CInode(MDCache *c, bool auth, snapid_t f, snapid_t l)
- :
- mdcache(c),
- first(f), last(l),
- item_dirty(this),
+CInode::CInode(MDCache *c, bool auth, snapid_t f, snapid_t l) :
+ mdcache(c), first(f), last(l), item_dirty(this),
item_caps(this),
item_open_file(this),
item_dirty_parent(this),
flocklock(this, &flocklock_type),
policylock(this, &policylock_type)
{
- if (auth) state_set(STATE_AUTH);
+ if (auth)
+ state_set(STATE_AUTH);
}
void CInode::print(ostream& out)
// FIXME: this is non-optimal, as we'll block freezes/migrations for potentially
// long periods waiting for clients to flush their snaps.
- auth_pin(this); // pin head inode...
+ auth_pin(this); // pin head get_inode()->..
}
auto &clients = client_need_snapflush[snapid];
}
}
-CInode::projected_inode &CInode::project_inode(bool xattr, bool snap)
+CInode::projected_inode CInode::project_inode(bool xattr, bool snap)
{
- auto &pi = projected_nodes.empty() ?
- projected_nodes.emplace_back(inode) :
- projected_nodes.emplace_back(projected_nodes.back().inode);
+ auto pi = allocate_inode(*get_projected_inode());
if (scrub_infop && scrub_infop->last_scrub_dirty) {
- pi.inode.last_scrub_stamp = scrub_infop->last_scrub_stamp;
- pi.inode.last_scrub_version = scrub_infop->last_scrub_version;
+ pi->last_scrub_stamp = scrub_infop->last_scrub_stamp;
+ pi->last_scrub_version = scrub_infop->last_scrub_version;
scrub_infop->last_scrub_dirty = false;
scrub_maybe_delete_info();
}
+ const auto& ox = get_projected_xattrs();
+ xattr_map_ptr px;
if (xattr) {
- pi.xattrs.reset(new mempool_xattr_map(*get_projected_xattrs()));
- ++num_projected_xattrs;
+ px = allocate_xattr_map();
+ if (ox)
+ *px = *ox;
}
+ sr_t* ps = projected_inode::UNDEF_SRNODE;
if (snap) {
- project_snaprealm();
+ ps = prepare_new_srnode(0);
+ ++num_projected_srnodes;
}
- dout(15) << __func__ << " " << pi.inode.ino << dendl;
- return pi;
+ projected_nodes.emplace_back(pi, xattr ? px : ox , ps);
+
+ dout(15) << __func__ << " " << pi->ino << dendl;
+ return projected_inode(std::move(pi), std::move(px), ps);
}
void CInode::pop_and_dirty_projected_inode(LogSegment *ls)
{
ceph_assert(!projected_nodes.empty());
- auto& front = projected_nodes.front();
+ auto front = std::move(projected_nodes.front());
+ dout(15) << __func__ << " v" << front.inode->version << dendl;
- dout(15) << __func__ << " " << front.inode.ino
- << " v" << front.inode.version << dendl;
+ projected_nodes.pop_front();
+
+ bool pool_update = get_inode()->layout.pool_id != front.inode->layout.pool_id;
+ bool pin_update = get_inode()->export_pin != front.inode->export_pin;
+ bool dist_update = get_inode()->export_ephemeral_distributed_pin !=
+ front.inode->export_ephemeral_distributed_pin;
- int64_t old_pool = inode.layout.pool_id;
- bool pin_update = inode.export_pin != front.inode.export_pin;
- bool dist_update = inode.export_ephemeral_distributed_pin
- != front.inode.export_ephemeral_distributed_pin;
+ reset_inode(std::move(front.inode));
+ if (front.xattrs != get_xattrs())
+ reset_xattrs(std::move(front.xattrs));
- mark_dirty(front.inode.version, ls);
+ if (front.snapnode != projected_inode::UNDEF_SRNODE) {
+ --num_projected_srnodes;
+ pop_projected_snaprealm(front.snapnode, false);
+ }
- inode = std::move(front.inode);
+ mark_dirty(ls);
+ if (get_inode()->is_backtrace_updated())
+ mark_dirty_parent(ls, pool_update);
if (pin_update)
maybe_export_pin(true);
if (dist_update)
maybe_ephemeral_dist_children(true);
-
- if (inode.is_backtrace_updated())
- mark_dirty_parent(ls, old_pool != inode.layout.pool_id);
-
- if (front.xattrs) {
- --num_projected_xattrs;
- xattrs = *front.xattrs;
- }
-
- if (projected_nodes.front().snapnode != projected_inode::UNDEF_SRNODE) {
- pop_projected_snaprealm(projected_nodes.front().snapnode, false);
- --num_projected_srnodes;
- }
-
- projected_nodes.pop_front();
-}
-
-CInode::mempool_xattr_map *CInode::get_projected_xattrs()
-{
- if (num_projected_xattrs > 0) {
- for (auto it = projected_nodes.rbegin(); it != projected_nodes.rend(); ++it)
- if (it->xattrs)
- return it->xattrs.get();
- }
- return &xattrs;
-}
-
-CInode::mempool_xattr_map *CInode::get_previous_projected_xattrs()
-{
- if (num_projected_xattrs > 0) {
- for (auto it = ++projected_nodes.rbegin(); it != projected_nodes.rend(); ++it)
- if (it->xattrs)
- return it->xattrs.get();
- }
- return &xattrs;
}
sr_t *CInode::prepare_new_srnode(snapid_t snapid)
// dirfrags
+InodeStoreBase::inode_const_ptr InodeStoreBase::empty_inode = InodeStoreBase::allocate_inode();
+
__u32 InodeStoreBase::hash_dentry_name(std::string_view dn)
{
- int which = inode.dir_layout.dl_dir_hash;
+ int which = inode->dir_layout.dl_dir_hash;
if (!which)
which = CEPH_STR_HASH_LINUX;
ceph_assert(ceph_str_hash_valid(which));
void CInode::name_stray_dentry(string& dname)
{
char s[20];
- snprintf(s, sizeof(s), "%llx", (unsigned long long)inode.ino.val);
+ snprintf(s, sizeof(s), "%llx", (unsigned long long)ino().val);
dname = s;
}
CDentry* _cdentry = get_projected_parent_dn();
if (_cdentry) {
pv = _cdentry->pre_dirty(get_projected_version());
- dout(10) << "pre_dirty " << pv << " (current v " << inode.version << ")" << dendl;
+ dout(10) << "pre_dirty " << pv << " (current v " << get_inode()->version << ")" << dendl;
} else {
ceph_assert(is_base());
pv = get_projected_version() + 1;
}
// force update backtrace for old format inode (see mempool_inode::decode)
- if (inode.backtrace_version == 0 && !projected_nodes.empty()) {
- mempool_inode &pi = projected_nodes.back().inode;
- if (pi.backtrace_version == 0)
- pi.update_backtrace(pv);
+ if (get_inode()->backtrace_version == 0 && !projected_nodes.empty()) {
+ auto pi = _get_projected_inode();
+ if (pi->backtrace_version == 0)
+ pi->update_backtrace(pv);
}
return pv;
}
ls->dirty_inodes.push_back(&item_dirty);
}
-void CInode::mark_dirty(version_t pv, LogSegment *ls) {
+void CInode::mark_dirty(LogSegment *ls) {
dout(10) << __func__ << " " << *this << dendl;
ceph_assert(is_auth());
// touch my private version
- ceph_assert(inode.version < pv);
- inode.version = pv;
_mark_dirty(ls);
// mark dentry too
if (parent)
- parent->mark_dirty(pv, ls);
+ parent->mark_dirty(get_version(), ls);
}
void CInode::build_backtrace(int64_t pool, inode_backtrace_t& bt)
{
- bt.ino = inode.ino;
+ bt.ino = ino();
bt.ancestors.clear();
bt.pool = pool;
CDentry *pdn = get_parent_dn();
while (pdn) {
CInode *diri = pdn->get_dir()->get_inode();
- bt.ancestors.push_back(inode_backpointer_t(diri->ino(), pdn->get_name(), in->inode.version));
+ bt.ancestors.push_back(inode_backpointer_t(diri->ino(), pdn->get_name(), in->get_inode()->version));
in = diri;
pdn = in->get_parent_dn();
}
- for (auto &p : inode.old_pools) {
+ for (auto &p : get_inode()->old_pools) {
// don't add our own pool id to old_pools to avoid looping (e.g. setlayout 0, 1, 0)
if (p != pool)
bt.old_pools.insert(p);
op.setxattr("parent", parent_bl);
bufferlist layout_bl;
- encode(inode.layout, layout_bl, mdcache->mds->mdsmap->get_up_features());
+ encode(get_inode()->layout, layout_bl, mdcache->mds->mdsmap->get_up_features());
op.setxattr("layout", layout_bl);
SnapContext snapc;
object_t oid = get_object_name(ino(), frag_t(), "");
object_locator_t oloc(pool);
Context *fin2 = new C_OnFinisher(
- new C_IO_Inode_StoredBacktrace(this, inode.backtrace_version, fin),
+ new C_IO_Inode_StoredBacktrace(this, get_inode()->backtrace_version, fin),
mdcache->mds->finisher);
- if (!state_test(STATE_DIRTYPOOL) || inode.old_pools.empty()) {
+ if (!state_test(STATE_DIRTYPOOL) || get_inode()->old_pools.empty()) {
dout(20) << __func__ << ": no dirtypool or no old pools" << dendl;
mdcache->mds->objecter->mutate(oid, oloc, op, snapc,
ceph::real_clock::now(),
// In the case where DIRTYPOOL is set, we update all old pools backtraces
// such that anyone reading them will see the new pool ID in
// inode_backtrace_t::pool and go read everything else from there.
- for (const auto &p : inode.old_pools) {
+ for (const auto &p : get_inode()->old_pools) {
if (p == pool)
continue;
dout(10) << __func__ << " v " << v << dendl;
auth_unpin(this);
- if (v == inode.backtrace_version)
+ if (v == get_inode()->backtrace_version)
clear_dirty_parent();
if (fin)
fin->complete(0);
void CInode::fetch_backtrace(Context *fin, bufferlist *backtrace)
{
- mdcache->fetch_backtrace(inode.ino, get_backtrace_pool(), *backtrace, fin);
+ mdcache->fetch_backtrace(ino(), get_backtrace_pool(), *backtrace, fin);
}
void CInode::mark_dirty_parent(LogSegment *ls, bool dirty_pool)
// parent dir
+void InodeStoreBase::encode_xattrs(bufferlist &bl) const {
+ using ceph::encode;
+ if (xattrs)
+ encode(*xattrs, bl);
+ else
+ encode((__u32)0, bl);
+}
+
+void InodeStoreBase::decode_xattrs(bufferlist::const_iterator &p) {
+ using ceph::decode;
+ mempool_xattr_map tmp;
+ decode_noshare(tmp, p);
+ if (tmp.empty()) {
+ reset_xattrs(xattr_map_ptr());
+ } else {
+ reset_xattrs(allocate_xattr_map(std::move(tmp)));
+ }
+}
+
+void InodeStoreBase::encode_old_inodes(bufferlist &bl, uint64_t features) const {
+ using ceph::encode;
+ if (old_inodes)
+ encode(*old_inodes, bl, features);
+ else
+ encode((__u32)0, bl);
+}
+
+void InodeStoreBase::decode_old_inodes(bufferlist::const_iterator &p) {
+ using ceph::decode;
+ mempool_old_inode_map tmp;
+ decode(tmp, p);
+ if (tmp.empty()) {
+ reset_old_inodes(old_inode_map_ptr());
+ } else {
+ reset_old_inodes(allocate_old_inode_map(std::move(tmp)));
+ }
+}
+
void InodeStoreBase::encode_bare(bufferlist &bl, uint64_t features,
const bufferlist *snap_blob) const
{
using ceph::encode;
- encode(inode, bl, features);
- if (is_symlink())
+ encode(*inode, bl, features);
+ if (inode->is_symlink())
encode(symlink, bl);
encode(dirfragtree, bl);
- encode(xattrs, bl);
+ encode_xattrs(bl);
+
if (snap_blob)
encode(*snap_blob, bl);
else
encode(bufferlist(), bl);
- encode(old_inodes, bl, features);
+ encode_old_inodes(bl, features);
encode(oldest_snap, bl);
encode(damage_flags, bl);
}
bufferlist& snap_blob, __u8 struct_v)
{
using ceph::decode;
- decode(inode, bl);
- if (is_symlink()) {
+
+ auto _inode = allocate_inode();
+ decode(*_inode, bl);
+
+ if (_inode->is_symlink()) {
std::string tmp;
decode(tmp, bl);
symlink = std::string_view(tmp);
}
decode(dirfragtree, bl);
- decode_noshare(xattrs, bl);
+ decode_xattrs(bl);
decode(snap_blob, bl);
- decode(old_inodes, bl);
- if (struct_v == 2 && inode.is_dir()) {
+ decode_old_inodes(bl);
+ if (struct_v == 2 && _inode->is_dir()) {
bool default_layout_exists;
decode(default_layout_exists, bl);
if (default_layout_exists) {
decode(struct_v, bl); // this was a default_file_layout
- decode(inode.layout, bl); // but we only care about the layout portion
+ decode(_inode->layout, bl); // but we only care about the layout portion
}
}
decode(damage_flags, bl);
}
}
+
+ reset_inode(std::move(_inode));
}
void CInode::encode_lock_iauth(bufferlist& bl)
{
ENCODE_START(1, 1, bl);
- encode(inode.version, bl);
- encode(inode.ctime, bl);
- encode(inode.mode, bl);
- encode(inode.uid, bl);
- encode(inode.gid, bl);
+ encode(get_inode()->version, bl);
+ encode(get_inode()->ctime, bl);
+ encode(get_inode()->mode, bl);
+ encode(get_inode()->uid, bl);
+ encode(get_inode()->gid, bl);
ENCODE_FINISH(bl);
}
void CInode::decode_lock_iauth(bufferlist::const_iterator& p)
{
+ ceph_assert(!is_auth());
+ auto _inode = allocate_inode(*get_inode());
DECODE_START(1, p);
- decode(inode.version, p);
+ decode(_inode->version, p);
utime_t tm;
decode(tm, p);
- if (inode.ctime < tm) inode.ctime = tm;
- decode(inode.mode, p);
- decode(inode.uid, p);
- decode(inode.gid, p);
+ if (_inode->ctime < tm) _inode->ctime = tm;
+ decode(_inode->mode, p);
+ decode(_inode->uid, p);
+ decode(_inode->gid, p);
DECODE_FINISH(p);
+ reset_inode(std::move(_inode));
}
void CInode::encode_lock_ilink(bufferlist& bl)
{
ENCODE_START(1, 1, bl);
- encode(inode.version, bl);
- encode(inode.ctime, bl);
- encode(inode.nlink, bl);
+ encode(get_inode()->version, bl);
+ encode(get_inode()->ctime, bl);
+ encode(get_inode()->nlink, bl);
ENCODE_FINISH(bl);
}
void CInode::decode_lock_ilink(bufferlist::const_iterator& p)
{
+ ceph_assert(!is_auth());
+ auto _inode = allocate_inode(*get_inode());
DECODE_START(1, p);
- decode(inode.version, p);
+ decode(_inode->version, p);
utime_t tm;
decode(tm, p);
- if (inode.ctime < tm) inode.ctime = tm;
- decode(inode.nlink, p);
+ if (_inode->ctime < tm) _inode->ctime = tm;
+ decode(_inode->nlink, p);
DECODE_FINISH(p);
+ reset_inode(std::move(_inode));
}
void CInode::encode_lock_idft(bufferlist& bl)
{
ENCODE_START(1, 1, bl);
if (is_auth()) {
- encode(inode.version, bl);
+ encode(get_inode()->version, bl);
} else {
// treat flushing as dirty when rejoining cache
bool dirty = dirfragtreelock.is_dirty_or_flushing();
void CInode::decode_lock_idft(bufferlist::const_iterator& p)
{
+ inode_ptr _inode;
+
DECODE_START(1, p);
if (is_auth()) {
bool replica_dirty;
dirfragtreelock.mark_dirty(); // ok bc we're auth and caller will handle
}
} else {
- decode(inode.version, p);
+ _inode = allocate_inode(*get_inode());
+ decode(_inode->version, p);
}
{
fragtree_t temp;
verify_dirfrags();
}
DECODE_FINISH(p);
+
+ if (_inode)
+ reset_inode(std::move(_inode));
}
void CInode::encode_lock_ifile(bufferlist& bl)
{
ENCODE_START(1, 1, bl);
if (is_auth()) {
- encode(inode.version, bl);
- encode(inode.ctime, bl);
- encode(inode.mtime, bl);
- encode(inode.atime, bl);
- encode(inode.time_warp_seq, bl);
+ encode(get_inode()->version, bl);
+ encode(get_inode()->ctime, bl);
+ encode(get_inode()->mtime, bl);
+ encode(get_inode()->atime, bl);
+ encode(get_inode()->time_warp_seq, bl);
if (!is_dir()) {
- encode(inode.layout, bl, mdcache->mds->mdsmap->get_up_features());
- encode(inode.size, bl);
- encode(inode.truncate_seq, bl);
- encode(inode.truncate_size, bl);
- encode(inode.client_ranges, bl);
- encode(inode.inline_data, bl);
+ encode(get_inode()->layout, bl, mdcache->mds->mdsmap->get_up_features());
+ encode(get_inode()->size, bl);
+ encode(get_inode()->truncate_seq, bl);
+ encode(get_inode()->truncate_size, bl);
+ encode(get_inode()->client_ranges, bl);
+ encode(get_inode()->inline_data, bl);
}
} else {
// treat flushing as dirty when rejoining cache
bool dirty = filelock.is_dirty_or_flushing();
encode(dirty, bl);
}
- dout(15) << __func__ << " inode.dirstat is " << inode.dirstat << dendl;
- encode(inode.dirstat, bl); // only meaningful if i am auth.
+ dout(15) << __func__ << " inode.dirstat is " << get_inode()->dirstat << dendl;
+ encode(get_inode()->dirstat, bl); // only meaningful if i am auth.
bufferlist tmp;
__u32 n = 0;
for (const auto &p : dirfrags) {
void CInode::decode_lock_ifile(bufferlist::const_iterator& p)
{
+ inode_ptr _inode;
+
DECODE_START(1, p);
if (!is_auth()) {
- decode(inode.version, p);
+ _inode = allocate_inode(*get_inode());
+
+ decode(_inode->version, p);
utime_t tm;
decode(tm, p);
- if (inode.ctime < tm) inode.ctime = tm;
- decode(inode.mtime, p);
- decode(inode.atime, p);
- decode(inode.time_warp_seq, p);
+ if (_inode->ctime < tm) _inode->ctime = tm;
+ decode(_inode->mtime, p);
+ decode(_inode->atime, p);
+ decode(_inode->time_warp_seq, p);
if (!is_dir()) {
- decode(inode.layout, p);
- decode(inode.size, p);
- decode(inode.truncate_seq, p);
- decode(inode.truncate_size, p);
- decode(inode.client_ranges, p);
- decode(inode.inline_data, p);
+ decode(_inode->layout, p);
+ decode(_inode->size, p);
+ decode(_inode->truncate_seq, p);
+ decode(_inode->truncate_size, p);
+ decode(_inode->client_ranges, p);
+ decode(_inode->inline_data, p);
}
} else {
bool replica_dirty;
decode(dirstat, p);
if (!is_auth()) {
dout(10) << " taking inode dirstat " << dirstat << " for " << *this << dendl;
- inode.dirstat = dirstat; // take inode summation if replica
+ _inode->dirstat = dirstat; // take inode summation if replica
}
__u32 n;
decode(n, p);
dir->first = fgfirst;
fnode_t *pf = dir->get_projected_fnode();
finish_scatter_update(&filelock, dir,
- inode.dirstat.version, pf->accounted_fragstat.version);
+ _inode->dirstat.version, pf->accounted_fragstat.version);
}
}
}
DECODE_FINISH(p);
+
+ if (_inode)
+ reset_inode(std::move(_inode));
}
void CInode::encode_lock_inest(bufferlist& bl)
{
ENCODE_START(1, 1, bl);
if (is_auth()) {
- encode(inode.version, bl);
+ encode(get_inode()->version, bl);
} else {
// treat flushing as dirty when rejoining cache
bool dirty = nestlock.is_dirty_or_flushing();
encode(dirty, bl);
}
- dout(15) << __func__ << " inode.rstat is " << inode.rstat << dendl;
- encode(inode.rstat, bl); // only meaningful if i am auth.
+ dout(15) << __func__ << " inode.rstat is " << get_inode()->rstat << dendl;
+ encode(get_inode()->rstat, bl); // only meaningful if i am auth.
bufferlist tmp;
__u32 n = 0;
for (const auto &p : dirfrags) {
void CInode::decode_lock_inest(bufferlist::const_iterator& p)
{
+ inode_ptr _inode;
+
DECODE_START(1, p);
if (is_auth()) {
bool replica_dirty;
nestlock.mark_dirty(); // ok bc we're auth and caller will handle
}
} else {
- decode(inode.version, p);
+ _inode = allocate_inode(*get_inode());
+ decode(_inode->version, p);
}
nest_info_t rstat;
decode(rstat, p);
if (!is_auth()) {
dout(10) << __func__ << " taking inode rstat " << rstat << " for " << *this << dendl;
- inode.rstat = rstat; // take inode summation if replica
+ _inode->rstat = rstat; // take inode summation if replica
}
__u32 n;
decode(n, p);
dir->first = fgfirst;
fnode_t *pf = dir->get_projected_fnode();
finish_scatter_update(&nestlock, dir,
- inode.rstat.version, pf->accounted_rstat.version);
+ _inode->rstat.version, pf->accounted_rstat.version);
}
}
}
DECODE_FINISH(p);
+
+ if (_inode)
+ reset_inode(std::move(_inode));
}
void CInode::encode_lock_ixattr(bufferlist& bl)
{
ENCODE_START(1, 1, bl);
- encode(inode.version, bl);
- encode(inode.ctime, bl);
- encode(xattrs, bl);
+ encode(get_inode()->version, bl);
+ encode(get_inode()->ctime, bl);
+ encode_xattrs(bl);
ENCODE_FINISH(bl);
}
void CInode::decode_lock_ixattr(bufferlist::const_iterator& p)
{
+ ceph_assert(!is_auth());
+ auto _inode = allocate_inode(*get_inode());
DECODE_START(1, p);
- decode(inode.version, p);
+ decode(_inode->version, p);
utime_t tm;
decode(tm, p);
- if (inode.ctime < tm) inode.ctime = tm;
- decode_noshare(xattrs, p);
+ if (_inode->ctime < tm)
+ _inode->ctime = tm;
+ decode_xattrs(p);
DECODE_FINISH(p);
+ reset_inode(std::move(_inode));
}
void CInode::encode_lock_isnap(bufferlist& bl)
{
ENCODE_START(1, 1, bl);
- encode(inode.version, bl);
- encode(inode.ctime, bl);
+ encode(get_inode()->version, bl);
+ encode(get_inode()->ctime, bl);
encode_snap(bl);
ENCODE_FINISH(bl);
}
void CInode::decode_lock_isnap(bufferlist::const_iterator& p)
{
+ ceph_assert(!is_auth());
+ auto _inode = allocate_inode(*get_inode());
DECODE_START(1, p);
- decode(inode.version, p);
+ decode(_inode->version, p);
utime_t tm;
decode(tm, p);
- if (inode.ctime < tm) inode.ctime = tm;
+ if (_inode->ctime < tm) _inode->ctime = tm;
decode_snap(p);
DECODE_FINISH(p);
+ reset_inode(std::move(_inode));
}
void CInode::encode_lock_iflock(bufferlist& bl)
{
ENCODE_START(1, 1, bl);
- encode(inode.version, bl);
+ encode(get_inode()->version, bl);
_encode_file_locks(bl);
ENCODE_FINISH(bl);
}
void CInode::decode_lock_iflock(bufferlist::const_iterator& p)
{
+ ceph_assert(!is_auth());
+ auto _inode = allocate_inode(*get_inode());
DECODE_START(1, p);
- decode(inode.version, p);
+ decode(_inode->version, p);
_decode_file_locks(p);
DECODE_FINISH(p);
+ reset_inode(std::move(_inode));
}
void CInode::encode_lock_ipolicy(bufferlist& bl)
{
ENCODE_START(2, 1, bl);
- if (inode.is_dir()) {
- encode(inode.version, bl);
- encode(inode.ctime, bl);
- encode(inode.layout, bl, mdcache->mds->mdsmap->get_up_features());
- encode(inode.quota, bl);
- encode(inode.export_pin, bl);
- encode(inode.export_ephemeral_distributed_pin, bl);
- encode(inode.export_ephemeral_random_pin, bl);
+ if (is_dir()) {
+ encode(get_inode()->version, bl);
+ encode(get_inode()->ctime, bl);
+ encode(get_inode()->layout, bl, mdcache->mds->mdsmap->get_up_features());
+ encode(get_inode()->quota, bl);
+ encode(get_inode()->export_pin, bl);
+ encode(get_inode()->export_ephemeral_distributed_pin, bl);
+ encode(get_inode()->export_ephemeral_random_pin, bl);
}
ENCODE_FINISH(bl);
}
void CInode::decode_lock_ipolicy(bufferlist::const_iterator& p)
{
- DECODE_START(2, p);
- if (inode.is_dir()) {
- decode(inode.version, p);
+ ceph_assert(!is_auth());
+ auto _inode = allocate_inode(*get_inode());
+ DECODE_START(1, p);
+ if (is_dir()) {
+ decode(_inode->version, p);
utime_t tm;
decode(tm, p);
- if (inode.ctime < tm) inode.ctime = tm;
- decode(inode.layout, p);
- decode(inode.quota, p);
- {
- mds_rank_t old_pin = inode.export_pin;
- decode(inode.export_pin, p);
- maybe_export_pin(old_pin != inode.export_pin);
- }
+ if (_inode->ctime < tm)
+ _inode->ctime = tm;
+ decode(_inode->layout, p);
+ decode(_inode->quota, p);
+ decode(_inode->export_pin, p);
if (struct_v >= 2) {
- {
- bool old_ephemeral_pin = inode.export_ephemeral_distributed_pin;
- decode(inode.export_ephemeral_distributed_pin, p);
- maybe_ephemeral_dist_children(old_ephemeral_pin != inode.export_ephemeral_distributed_pin);
- }
- decode(inode.export_ephemeral_random_pin, p);
+ decode(_inode->export_ephemeral_distributed_pin, p);
+ decode(_inode->export_ephemeral_random_pin, p);
}
}
DECODE_FINISH(p);
+ mds_rank_t old_export_pin = get_inode()->export_pin;
+ bool old_ephemeral_pin = get_inode()->export_ephemeral_distributed_pin;
+ reset_inode(std::move(_inode));
+ maybe_export_pin(old_export_pin != get_inode()->export_pin);
+ maybe_ephemeral_dist_children(old_ephemeral_pin != get_inode()->export_ephemeral_distributed_pin);
}
void CInode::encode_lock_state(int type, bufferlist& bl)
{
dout(10) << __func__ << " " << *lock << " on " << *this << dendl;
ceph_assert(is_auth());
- mempool_inode *pi = get_projected_inode();
+ const auto& pi = get_projected_inode();
for (const auto &p : dirfrags) {
frag_t fg = p.first;
MutationRef mut(new MutationImpl());
mut->ls = mdlog->get_current_segment();
- mempool_inode *pi = get_projected_inode();
fnode_t *pf = dir->project_fnode();
std::string_view ename;
switch (lock->get_type()) {
case CEPH_LOCK_IFILE:
- pf->fragstat.version = pi->dirstat.version;
+ pf->fragstat.version = inode_version;
pf->accounted_fragstat = pf->fragstat;
ename = "lock ifile accounted scatter stat update";
break;
case CEPH_LOCK_INEST:
- pf->rstat.version = pi->rstat.version;
+ pf->rstat.version = inode_version;
pf->accounted_rstat = pf->rstat;
ename = "lock inest accounted scatter stat update";
// adjust summation
ceph_assert(is_auth());
- mempool_inode *pi = get_projected_inode();
+ auto pi = _get_projected_inode();
bool touched_mtime = false, touched_chattr = false;
dout(20) << " orig dirstat " << pi->dirstat << dendl;
if (const sr_t *srnode = get_projected_srnode(); srnode)
rstat.rsnaps = srnode->snaps.size();
- mempool_inode *pi = get_projected_inode();
+ auto pi = _get_projected_inode();
dout(20) << " orig rstat " << pi->rstat << dendl;
pi->rstat.version++;
for (const auto &p : dirfrags) {
snapid_t CInode::get_oldest_snap()
{
snapid_t t = first;
- if (!old_inodes.empty())
- t = old_inodes.begin()->second.first;
+ if (is_any_old_inodes())
+ t = get_old_inodes()->begin()->second.first;
return std::min(t, oldest_snap);
}
-CInode::mempool_old_inode& CInode::cow_old_inode(snapid_t follows, bool cow_head)
+const CInode::mempool_old_inode& CInode::cow_old_inode(snapid_t follows, bool cow_head)
{
ceph_assert(follows >= first);
- mempool_inode *pi = cow_head ? get_projected_inode() : get_previous_projected_inode();
- mempool_xattr_map *px = cow_head ? get_projected_xattrs() : get_previous_projected_xattrs();
+ const auto& pi = cow_head ? get_projected_inode() : get_previous_projected_inode();
+ const auto& px = cow_head ? get_projected_xattrs() : get_previous_projected_xattrs();
+
+ auto _old_inodes = allocate_old_inode_map();
+ if (old_inodes)
+ *_old_inodes = *old_inodes;
- mempool_old_inode &old = old_inodes[follows];
+ mempool_old_inode &old = (*_old_inodes)[follows];
old.first = first;
old.inode = *pi;
- old.xattrs = *px;
+ if (px) {
+ dout(10) << " " << px->size() << " xattrs cowed, " << *px << dendl;
+ old.xattrs = *px;
+ }
if (first < oldest_snap)
oldest_snap = first;
-
- dout(10) << " " << px->size() << " xattrs cowed, " << *px << dendl;
old.inode.trim_client_ranges(follows);
<< " to [" << old.first << "," << follows << "] on "
<< *this << dendl;
+ reset_old_inodes(std::move(_old_inodes));
return old;
}
-void CInode::split_old_inode(snapid_t snap)
-{
- auto it = old_inodes.lower_bound(snap);
- ceph_assert(it != old_inodes.end() && it->second.first < snap);
-
- mempool_old_inode &old = old_inodes[snap - 1];
- old = it->second;
-
- it->second.first = snap;
- dout(10) << __func__ << " " << "[" << old.first << "," << it->first
- << "] to [" << snap << "," << it->first << "] on " << *this << dendl;
-}
-
void CInode::pre_cow_old_inode()
{
snapid_t follows = mdcache->get_global_snaprealm()->get_newest_seq();
bool CInode::has_snap_data(snapid_t snapid)
{
bool found = snapid >= first && snapid <= last;
- if (!found && is_multiversion()) {
- auto p = old_inodes.lower_bound(snapid);
- if (p != old_inodes.end()) {
+ if (!found && is_any_old_inodes()) {
+ auto p = old_inodes->lower_bound(snapid);
+ if (p != old_inodes->end()) {
if (p->second.first > snapid) {
- if (p != old_inodes.begin())
+ if (p != old_inodes->begin())
--p;
}
if (p->second.first <= snapid && snapid <= p->first) {
{
dout(10) << __func__ << " " << snaps << dendl;
- for (auto it = old_inodes.begin(); it != old_inodes.end(); ) {
- const snapid_t &id = it->first;
- const auto &s = snaps.lower_bound(it->second.first);
+ if (!get_old_inodes())
+ return;
+
+ std::vector<snapid_t> to_remove;
+ for (auto p : *get_old_inodes()) {
+ const snapid_t &id = p.first;
+ const auto &s = snaps.lower_bound(p.second.first);
if (s == snaps.end() || *s > id) {
- dout(10) << " purging old_inode [" << it->second.first << "," << id << "]" << dendl;
- it = old_inodes.erase(it);
- } else {
- ++it;
+ dout(10) << " purging old_inode [" << p.second.first << "," << id << "]" << dendl;
+ to_remove.push_back(id);
}
}
+
+ if (to_remove.size() == get_old_inodes()->size()) {
+ reset_old_inodes(old_inode_map_ptr());
+ } else if (!to_remove.empty()) {
+ auto _old_inodes = allocate_old_inode_map(*get_old_inodes());
+ for (auto id : to_remove)
+ _old_inodes->erase(id);
+ reset_old_inodes(std::move(_old_inodes));
+ }
}
/*
* pick/create an old_inode
*/
-CInode::mempool_old_inode * CInode::pick_old_inode(snapid_t snap)
+snapid_t CInode::pick_old_inode(snapid_t snap) const
{
- auto it = old_inodes.lower_bound(snap); // p is first key >= to snap
- if (it != old_inodes.end() && it->second.first <= snap) {
- dout(10) << __func__ << " snap " << snap << " -> [" << it->second.first << "," << it->first << "]" << dendl;
- return &it->second;
+ if (is_any_old_inodes()) {
+ auto it = old_inodes->lower_bound(snap); // p is first key >= to snap
+ if (it != old_inodes->end() && it->second.first <= snap) {
+ dout(10) << __func__ << " snap " << snap << " -> [" << it->second.first << "," << it->first << "]" << dendl;
+ return it->first;
+ }
}
dout(10) << __func__ << " snap " << snap << " -> nothing" << dendl;
- return NULL;
+ return 0;
}
void CInode::open_snaprealm(bool nosplit)
}
int CInode::get_caps_allowed_for_client(Session *session, Capability *cap,
- mempool_inode *file_i) const
+ const mempool_inode *file_i) const
{
client_t client = session->get_client();
int allowed;
bool valid = true;
// pick a version!
- mempool_inode *oi = &inode;
- mempool_inode *pi = get_projected_inode();
+ const mempool_inode *oi = get_inode().get();
+ const mempool_inode *pi = get_projected_inode().get();
- CInode::mempool_xattr_map *pxattrs = nullptr;
+ const mempool_xattr_map *pxattrs = nullptr;
if (snapid != CEPH_NOSNAP) {
if (!is_auth())
valid = false;
- if (is_multiversion()) {
- auto it = old_inodes.lower_bound(snapid);
- if (it != old_inodes.end()) {
+ if (is_any_old_inodes()) {
+ auto it = old_inodes->lower_bound(snapid);
+ if (it != old_inodes->end()) {
if (it->second.first > snapid) {
- if (it != old_inodes.begin())
+ if (it != old_inodes->begin())
--it;
}
if (it->second.first <= snapid && snapid <= it->first) {
<< " to old_inode [" << it->second.first << "," << it->first << "]"
<< " " << it->second.inode.rstat
<< dendl;
- auto &p = it->second;
- pi = oi = &p.inode;
- pxattrs = &p.xattrs;
+ pi = oi = &it->second.inode;
+ pxattrs = &it->second.xattrs;
} else {
// snapshoted remote dentry can result this
dout(0) << __func__ << " old_inode for snapid " << snapid
bool plocal = versionlock.get_last_wrlock_client() == client;
bool ppolicy = policylock.is_xlocked_by_client(client) || get_loner()==client;
- mempool_inode *any_i = (pfile|pauth|plink|pxattr|plocal) ? pi : oi;
+ const mempool_inode *any_i = (pfile|pauth|plink|pxattr|plocal) ? pi : oi;
dout(20) << " pfile " << pfile << " pauth " << pauth
<< " plink " << plink << " pxattr " << pxattr
<< " valid=" << valid << dendl;
// file
- mempool_inode *file_i = pfile ? pi:oi;
+ const mempool_inode *file_i = pfile ? pi:oi;
file_layout_t layout;
if (is_dir()) {
layout = (ppolicy ? pi : oi)->layout;
}
// max_size is min of projected, actual
- uint64_t max_size =
- std::min(oi->client_ranges.count(client) ?
- oi->client_ranges[client].range.last : 0,
- pi->client_ranges.count(client) ?
- pi->client_ranges[client].range.last : 0);
+ uint64_t max_size;
+ {
+ auto it = oi->client_ranges.find(client);
+ if (it == oi->client_ranges.end()) {
+ max_size = 0;
+ } else {
+ max_size = it->second.range.last;
+ if (oi != pi) {
+ it = pi->client_ranges.find(client);
+ if (it == pi->client_ranges.end()) {
+ max_size = 0;
+ } else {
+ max_size = std::min(max_size, it->second.range.last);
+ }
+ }
+ }
+ }
// inline data
version_t inline_version = 0;
(getattr_caps & CEPH_CAP_FILE_RD)) { // client requests inline data
inline_version = file_i->inline_data.version;
if (file_i->inline_data.length() > 0)
- inline_data = file_i->inline_data.get_data();
+ file_i->inline_data.get_data(inline_data);
}
// nest (do same as file... :/)
}
// auth
- mempool_inode *auth_i = pauth ? pi:oi;
+ const mempool_inode *auth_i = pauth ? pi:oi;
// link
- mempool_inode *link_i = plink ? pi:oi;
+ const mempool_inode *link_i = plink ? pi:oi;
// xattr
- mempool_inode *xattr_i = pxattr ? pi:oi;
+ const mempool_inode *xattr_i = pxattr ? pi:oi;
using ceph::encode;
// xattr
(cap && cap->client_xattr_version < xattr_i->xattr_version) ||
(getattr_caps & CEPH_CAP_XATTR_SHARED)) { // client requests xattrs
if (!pxattrs)
- pxattrs = pxattr ? get_projected_xattrs() : &xattrs;
+ pxattrs = pxattr ? get_projected_xattrs().get() : get_xattrs().get();
xattr_version = xattr_i->xattr_version;
} else {
xattr_version = 0;
encode_xattrs();
encode(inline_version, bl);
encode(inline_data, bl);
- mempool_inode *policy_i = ppolicy ? pi : oi;
+ const mempool_inode *policy_i = ppolicy ? pi : oi;
encode(policy_i->quota, bl);
encode(layout.pool_ns, bl);
encode(any_i->btime, bl);
encode(inline_data, bl);
}
if (conn->has_feature(CEPH_FEATURE_MDS_QUOTA)) {
- mempool_inode *policy_i = ppolicy ? pi : oi;
+ const mempool_inode *policy_i = ppolicy ? pi : oi;
encode(policy_i->quota, bl);
}
if (conn->has_feature(CEPH_FEATURE_FS_FILE_LAYOUT_V2)) {
bool plink = linklock.is_xlocked_by_client(client);
bool pxattr = xattrlock.is_xlocked_by_client(client);
- mempool_inode *oi = &inode;
- mempool_inode *pi = get_projected_inode();
- mempool_inode *i = (pfile|pauth|plink|pxattr) ? pi : oi;
+ const mempool_inode *oi = get_inode().get();
+ const mempool_inode *pi = get_projected_inode().get();
+ const mempool_inode *i = (pfile|pauth|plink|pxattr) ? pi : oi;
dout(20) << __func__ << " pfile " << pfile
<< " pauth " << pauth << " plink " << plink << " pxattr " << pxattr
if (cap->client_inline_version < i->inline_data.version) {
m->inline_version = cap->client_inline_version = i->inline_data.version;
if (i->inline_data.length() > 0)
- m->inline_data = i->inline_data.get_data();
+ i->inline_data.get_data(m->inline_data);
} else {
m->inline_version = 0;
}
// max_size is min of projected, actual.
- uint64_t oldms = oi->client_ranges.count(client) ? oi->client_ranges[client].range.last : 0;
- uint64_t newms = pi->client_ranges.count(client) ? pi->client_ranges[client].range.last : 0;
- m->max_size = std::min(oldms, newms);
+ {
+ uint64_t max_size;
+ auto it = oi->client_ranges.find(client);
+ if (it == oi->client_ranges.end()) {
+ max_size = 0;
+ } else {
+ max_size = it->second.range.last;
+ if (oi != pi) {
+ it = pi->client_ranges.find(client);
+ if (it == pi->client_ranges.end()) {
+ max_size = 0;
+ } else {
+ max_size = std::min(max_size, it->second.range.last);
+ }
+ }
+ }
+ m->max_size = max_size;
+ }
i = pauth ? pi:oi;
m->head.mode = i->mode;
using ceph::encode;
i = pxattr ? pi:oi;
- auto ix = pxattr ? get_projected_xattrs() : &xattrs;
+ const auto& ix = pxattr ? get_projected_xattrs() : get_xattrs();
if ((cap->pending() & CEPH_CAP_XATTR_SHARED) &&
i->xattr_version > cap->client_xattr_version) {
dout(10) << " including xattrs v " << i->xattr_version << dendl;
- encode(*ix, m->xattrbl);
+ if (ix)
+ encode(*ix, m->xattrbl);
+ else
+ encode((__u32)0, m->xattrbl);
m->head.xattr_version = i->xattr_version;
cap->client_xattr_version = i->xattr_version;
}
{
ENCODE_START(1, 1, bl);
encode(first, bl);
- encode(inode, bl, features);
+ encode(*get_inode(), bl, features);
encode(symlink, bl);
encode(dirfragtree, bl);
- encode(xattrs, bl);
- encode(old_inodes, bl, features);
+ encode_xattrs(bl);
+ encode_old_inodes(bl, features);
encode(damage_flags, bl);
encode_snap(bl);
ENCODE_FINISH(bl);
{
DECODE_START(1, p);
decode(first, p);
- decode(inode, p);
+ {
+ auto _inode = allocate_inode();
+ decode(*_inode, p);
+ reset_inode(std::move(_inode));
+ }
{
std::string tmp;
decode(tmp, p);
symlink = std::string_view(tmp);
}
decode(dirfragtree, p);
- decode_noshare(xattrs, p);
- decode(old_inodes, p);
+ decode_xattrs(p);
+ decode_old_inodes(p);
decode(damage_flags, p);
decode_snap(p);
DECODE_FINISH(p);
// include scatterlock info for any bounding CDirs
bufferlist bounding;
- if (inode.is_dir())
+ if (get_inode()->is_dir())
for (const auto &p : dirfrags) {
CDir *dir = p.second;
if (dir->state_test(CDir::STATE_EXPORTBOUND)) {
void InodeStoreBase::dump(Formatter *f) const
{
- inode.dump(f);
+ inode->dump(f);
f->dump_string("symlink", symlink);
f->open_array_section("xattrs");
- for (const auto& [key, val] : xattrs) {
- f->open_object_section("xattr");
- f->dump_string("key", key);
- std::string v(val.c_str(), val.length());
- f->dump_string("val", v);
- f->close_section();
+ if (xattrs) {
+ for (const auto& [key, val] : *xattrs) {
+ f->open_object_section("xattr");
+ f->dump_string("key", key);
+ std::string v(val.c_str(), val.length());
+ f->dump_string("val", v);
+ f->close_section();
+ }
}
f->close_section();
f->open_object_section("dirfragtree");
f->close_section(); // dirfragtree
f->open_array_section("old_inodes");
- for (const auto &p : old_inodes) {
- f->open_object_section("old_inode");
- // The key is the last snapid, the first is in the mempool_old_inode
- f->dump_int("last", p.first);
- p.second.dump(f);
- f->close_section(); // old_inode
+ if (old_inodes) {
+ for (const auto &p : *old_inodes) {
+ f->open_object_section("old_inode");
+ // The key is the last snapid, the first is in the mempool_old_inode
+ f->dump_int("last", p.first);
+ p.second.dump(f);
+ f->close_section(); // old_inode
+ }
}
f->close_section(); // old_inodes
t = mempool::mds_co::string(std::string_view(obj->get_data()));
}
-void InodeStoreBase::decode_json(JSONObj *obj){
+void InodeStoreBase::decode_json(JSONObj *obj)
+{
+ {
+ auto _inode = allocate_inode();
+ _inode->decode_json(obj);
+ reset_inode(std::move(_inode));
+ }
- inode.decode_json(obj);
JSONDecoder::decode_json("symlink", symlink, obj, true);
// JSONDecoder::decode_json("dirfragtree", dirfragtree, obj, true); // cann't decode it now
- JSONDecoder::decode_json("xattrs", InodeStoreBase::xattrs, xattrs_cb, obj, true);
+ //
+ //
+ {
+ mempool_xattr_map tmp;
+ JSONDecoder::decode_json("xattrs", tmp, xattrs_cb, obj, true);
+ if (tmp.empty())
+ reset_xattrs(xattr_map_ptr());
+ else
+ reset_xattrs(allocate_xattr_map(std::move(tmp)));
+ }
// JSONDecoder::decode_json("old_inodes", old_inodes, InodeStoreBase::old_indoes_cb, obj, true); // cann't decode old_inodes now
JSONDecoder::decode_json("oldest_snap", oldest_snap.val, obj, true);
JSONDecoder::decode_json("damage_flags", damage_flags, obj, true);
void InodeStore::generate_test_instances(std::list<InodeStore*> &ls)
{
InodeStore *populated = new InodeStore;
- populated->inode.ino = 0xdeadbeef;
+ populated->get_inode()->ino = 0xdeadbeef;
populated->symlink = "rhubarb";
ls.push_back(populated);
}
void InodeStoreBare::generate_test_instances(std::list<InodeStoreBare*> &ls)
{
InodeStoreBare *populated = new InodeStoreBare;
- populated->inode.ino = 0xdeadbeef;
+ populated->get_inode()->ino = 0xdeadbeef;
populated->symlink = "rhubarb";
ls.push_back(populated);
}
bool _start(int rval) {
if (in->is_dirty()) {
- MDCache *mdcache = in->mdcache;
- mempool_inode& inode = in->inode;
+ MDCache *mdcache = in->mdcache; // For the benefit of dout
+ auto ino = [this]() { return in->ino(); }; // For the benefit of dout
dout(20) << "validating a dirty CInode; results will be inconclusive"
- << dendl;
+ << dendl;
}
if (in->is_symlink()) {
// there's nothing to do for symlinks!
int memory_newer;
MDCache *mdcache = in->mdcache; // For the benefit of dout
- const mempool_inode& inode = in->inode; // For the benefit of dout
+ auto ino = [this]() { return in->ino(); }; // For the benefit of dout
// Ignore rval because it's the result of a FAILOK operation
// from fetch_backtrace_and_tag: the real result is in
{
InoTable *inotable = mdcache->mds->inotable;
- dout(10) << "scrub: inotable ino = " << inode.ino << dendl;
+ dout(10) << "scrub: inotable ino = " << in->ino() << dendl;
dout(10) << "scrub: inotable free says "
- << inotable->is_marked_free(inode.ino) << dendl;
+ << inotable->is_marked_free(in->ino()) << dendl;
- if (inotable->is_marked_free(inode.ino)) {
+ if (inotable->is_marked_free(in->ino())) {
LogChannelRef clog = in->mdcache->mds->clog;
- clog->error() << "scrub: inode wrongly marked free: " << inode.ino;
+ clog->error() << "scrub: inode wrongly marked free: " << in->ino();
if (in->scrub_infop->header->get_repair()) {
- bool repaired = inotable->repair(inode.ino);
+ bool repaired = inotable->repair(in->ino());
if (repaired) {
- clog->error() << "inode table repaired for inode: " << inode.ino;
+ clog->error() << "inode table repaired for inode: " << in->ino();
inotable->save();
} else {
if (in->is_base()) {
if (!shadow_in) {
shadow_in = new CInode(in->mdcache);
- in->mdcache->create_unlinked_system_inode(shadow_in, in->inode.ino, in->inode.mode);
+ in->mdcache->create_unlinked_system_inode(shadow_in, in->ino(), in->get_inode()->mode);
in->mdcache->num_shadow_inodes++;
}
shadow_in->fetch(get_internal_callback(INODE));
}
bool _inode_disk(int rval) {
+ const auto& si = shadow_in->get_inode();
+ const auto& i = in->get_inode();
+
results->inode.checked = true;
results->inode.ondisk_read_retval = rval;
- results->inode.ondisk_value = shadow_in->inode;
- results->inode.memory_value = in->inode;
+ results->inode.ondisk_value = *si;
+ results->inode.memory_value = *i;
- mempool_inode& si = shadow_in->inode;
- mempool_inode& i = in->inode;
- if (si.version > i.version) {
+ if (si->version > i->version) {
// uh, what?
results->inode.error_str << "On-disk inode is newer than in-memory one; ";
goto next;
} else {
bool divergent = false;
- int r = i.compare(si, &divergent);
+ int r = i->compare(*si, &divergent);
results->inode.passed = !divergent && r >= 0;
if (!results->inode.passed) {
results->inode.error_str <<
results->raw_stats.checked = true;
results->raw_stats.ondisk_read_retval = rval;
- results->raw_stats.memory_value.dirstat = in->inode.dirstat;
- results->raw_stats.memory_value.rstat = in->inode.rstat;
+ results->raw_stats.memory_value.dirstat = in->get_inode()->dirstat;
+ results->raw_stats.memory_value.rstat = in->get_inode()->rstat;
frag_info_t& dir_info = results->raw_stats.ondisk_value.dirstat;
nest_info_t& nest_info = results->raw_stats.ondisk_value.rstat;
nest_info.rsnaps += srnode->snaps.size();
// ...and that their sum matches our inode settings
- if (!dir_info.same_sums(in->inode.dirstat) ||
- !nest_info.same_sums(in->inode.rstat)) {
+ if (!dir_info.same_sums(in->get_inode()->dirstat) ||
+ !nest_info.same_sums(in->get_inode()->rstat)) {
if (in->scrub_infop->header->get_repair()) {
results->raw_stats.error_str
<< "freshly-calculated rstats don't match existing ones (will be fixed)";
// break out of const-land to set up implicit initial state
CInode *me = const_cast<CInode*>(this);
- mempool_inode *in = me->get_projected_inode();
+ const auto& pi = me->get_projected_inode();
scrub_info_t *si = new scrub_info_t();
- si->scrub_start_stamp = si->last_scrub_stamp = in->last_scrub_stamp;
- si->scrub_start_version = si->last_scrub_version = in->last_scrub_version;
+ si->scrub_start_stamp = si->last_scrub_stamp = pi->last_scrub_stamp;
+ si->scrub_start_version = si->last_scrub_version = pi->last_scrub_version;
me->scrub_infop = si;
}
} else {
// Files are required to have an explicit layout that specifies
// a pool
- ceph_assert(inode.layout.pool_id != -1);
- return inode.layout.pool_id;
+ ceph_assert(get_inode()->layout.pool_id != -1);
+ return get_inode()->layout.pool_id;
}
}
dout(15) << __func__ << " !dir or !normal: cannot ephemeral distributed pin " << *this << dendl;
set_ephemeral_dist(false);
return;
- } else if (get_inode().nlink == 0) {
+ } else if (get_inode()->nlink == 0) {
dout(15) << __func__ << " unlinked directory: cannot ephemeral distributed pin " << *this << dendl;
set_ephemeral_dist(false);
return;
return;
}
- bool pin = dir->get_inode()->get_inode().export_ephemeral_distributed_pin;
+ bool pin = dir->get_inode()->get_inode()->export_ephemeral_distributed_pin;
if (pin) {
dout(10) << __func__ << " ephemeral distributed pinning " << *this << dendl;
set_ephemeral_dist(true);
} else if (!is_dir() || !is_normal()) {
dout(15) << __func__ << " !dir or !normal: cannot ephemeral distributed pin " << *this << dendl;
return;
- } else if (get_inode().nlink == 0) {
+ } else if (get_inode()->nlink == 0) {
dout(15) << __func__ << " unlinked directory: cannot ephemeral distributed pin " << *this << dendl;
return;
}
- bool pin = get_inode().export_ephemeral_distributed_pin;
+ bool pin = get_inode()->export_ephemeral_distributed_pin;
/* FIXME: expensive to iterate children when not updating */
if (!pin && !update) {
return;
dout(15) << __func__ << " !dir or !normal: cannot ephemeral random pin " << *this << dendl;
set_ephemeral_rand(false);
return;
- } else if (get_inode().nlink == 0) {
+ } else if (get_inode()->nlink == 0) {
dout(15) << __func__ << " unlinked directory: cannot ephemeral random pin " << *this << dendl;
set_ephemeral_rand(false);
return;
void CInode::setxattr_ephemeral_rand(double probability)
{
ceph_assert(is_dir());
- ceph_assert(is_projected());
- get_projected_inode()->export_ephemeral_random_pin = probability;
+ _get_projected_inode()->export_ephemeral_random_pin = probability;
}
void CInode::setxattr_ephemeral_dist(bool val)
{
ceph_assert(is_dir());
- ceph_assert(is_projected());
- get_projected_inode()->export_ephemeral_distributed_pin = val;
+ _get_projected_inode()->export_ephemeral_distributed_pin = val;
}
void CInode::set_export_pin(mds_rank_t rank)
{
ceph_assert(is_dir());
- ceph_assert(is_projected());
- get_projected_inode()->export_pin = rank;
+ _get_projected_inode()->export_pin = rank;
+ maybe_export_pin(true);
}
void CInode::check_pin_policy()
const CDentry *pdn = in->get_parent_dn();
if (!pdn)
break;
- if (in->get_inode().nlink == 0) {
+ if (in->get_inode()->nlink == 0) {
// ignore export pin for unlinked directory
return;
} else if (etarget != MDS_RANK_NONE && in->has_ephemeral_policy()) {
return;
- } else if (in->get_inode().export_pin >= 0) {
+ } else if (in->get_inode()->export_pin >= 0) {
/* clear any epin policy */
set_ephemeral_dist(false);
set_ephemeral_rand(false);
const CDentry *pdn = in->get_parent_dn();
if (!pdn)
break;
- if (in->get_inode().nlink == 0) {
+ if (in->get_inode()->nlink == 0) {
// ignore export pin for unlinked directory
return MDS_RANK_NONE;
} else if (etarget != MDS_RANK_NONE && in->has_ephemeral_policy()) {
return etarget;
- } else if (in->get_inode().export_pin >= 0) {
- return in->get_inode().export_pin;
+ } else if (in->get_inode()->export_pin >= 0) {
+ return in->get_inode()->export_pin;
} else if (etarget == MDS_RANK_NONE && ephemeral && in->is_ephemerally_pinned()) {
/* If a parent overrides a grandparent ephemeral pin policy with an export pin, we use that export pin instead. */
etarget = mdcache->hash_into_rank_bucket(in->ino());
if (!pdn)
break;
// ignore export pin for unlinked directory
- if (in->get_inode().nlink == 0)
+ if (in->get_inode()->nlink == 0)
break;
- if (in->get_inode().export_ephemeral_random_pin > 0.0)
- return std::min(in->get_inode().export_ephemeral_random_pin, max);
+ if (in->get_inode()->export_ephemeral_random_pin > 0.0)
+ return std::min(in->get_inode()->export_ephemeral_random_pin, max);
/* An export_pin overrides only if no closer parent (incl. this one) has a
* random pin set.
*/
- if (in->get_inode().export_pin >= 0)
+ if (in->get_inode()->export_pin >= 0)
return 0.0;
if (!inherit)
#include <string_view>
#include "common/config.h"
+#include "common/RefCountedObj.h"
#include "include/counter.h"
#include "include/elist.h"
#include "include/types.h"
*/
class InodeStoreBase {
public:
- typedef inode_t<mempool::mds_co::pool_allocator> mempool_inode;
- typedef old_inode_t<mempool::mds_co::pool_allocator> mempool_old_inode;
- typedef mempool::mds_co::compact_map<snapid_t, mempool_old_inode> mempool_old_inode_map;
- typedef xattr_map<mempool::mds_co::pool_allocator> mempool_xattr_map; // FIXME bufferptr not in mempool
+ using mempool_inode = inode_t<mempool::mds_co::pool_allocator>;
+ using inode_ptr = std::shared_ptr<mempool_inode>;
+ using inode_const_ptr = std::shared_ptr<const mempool_inode>;
+
+ template <typename ...Args>
+ static inode_ptr allocate_inode(Args && ...args) {
+ static mempool::mds_co::pool_allocator<mempool_inode> allocator;
+ return std::allocate_shared<mempool_inode>(allocator, std::forward<Args>(args)...);
+ }
+
+ using mempool_xattr_map = xattr_map<mempool::mds_co::pool_allocator>; // FIXME bufferptr not in mempool
+ using xattr_map_ptr = std::shared_ptr<mempool_xattr_map>;
+ using xattr_map_const_ptr = std::shared_ptr<const mempool_xattr_map>;
+
+ template <typename ...Args>
+ static xattr_map_ptr allocate_xattr_map(Args && ...args) {
+ static mempool::mds_co::pool_allocator<mempool_xattr_map> allocator;
+ return std::allocate_shared<mempool_xattr_map>(allocator, std::forward<Args>(args)...);
+ }
+
+ using mempool_old_inode = old_inode_t<mempool::mds_co::pool_allocator>;
+ using mempool_old_inode_map = mempool::mds_co::map<snapid_t, mempool_old_inode>;
+ using old_inode_map_ptr = std::shared_ptr<mempool_old_inode_map>;
+ using old_inode_map_const_ptr = std::shared_ptr<const mempool_old_inode_map>;
+
+ template <typename ...Args>
+ static old_inode_map_ptr allocate_old_inode_map(Args && ...args) {
+ static mempool::mds_co::pool_allocator<mempool_old_inode_map> allocator;
+ return std::allocate_shared<mempool_old_inode_map>(allocator, std::forward<Args>(args)...);
+ }
- InodeStoreBase() {}
+ void reset_inode(inode_const_ptr&& ptr) {
+ inode = std::move(ptr);
+ }
+
+ void reset_xattrs(xattr_map_const_ptr&& ptr) {
+ xattrs = std::move(ptr);
+ }
+
+ void reset_old_inodes(old_inode_map_const_ptr&& ptr) {
+ old_inodes = std::move(ptr);
+ }
+
+ void encode_xattrs(bufferlist &bl) const;
+ void decode_xattrs(bufferlist::const_iterator &p);
+ void encode_old_inodes(bufferlist &bl, uint64_t features) const;
+ void decode_old_inodes(bufferlist::const_iterator &p);
/* Helpers */
- bool is_file() const { return inode.is_file(); }
- bool is_symlink() const { return inode.is_symlink(); }
- bool is_dir() const { return inode.is_dir(); }
static object_t get_object_name(inodeno_t ino, frag_t fg, std::string_view suffix);
/* Full serialization for use in ".inode" root inode objects */
__u32 hash_dentry_name(std::string_view dn);
frag_t pick_dirfrag(std::string_view dn);
- mempool_inode inode; // the inode itself
- mempool::mds_co::string symlink; // symlink dest, if symlink
- mempool_xattr_map xattrs;
- fragtree_t dirfragtree; // dir frag tree, if any. always consistent with our dirfrag map.
- mempool_old_inode_map old_inodes; // key = last, value.first = first
- snapid_t oldest_snap = CEPH_NOSNAP;
- damage_flags_t damage_flags = 0;
+ mempool::mds_co::string symlink; // symlink dest, if symlink
+ fragtree_t dirfragtree; // dir frag tree, if any. always consistent with our dirfrag map.
+ snapid_t oldest_snap = CEPH_NOSNAP;
+ damage_flags_t damage_flags = 0;
+
+protected:
+ static inode_const_ptr empty_inode;
+
+ // Following members are pointers to constant data, the constant data can
+ // be shared by CInode and log events. To update these members in CInode,
+ // read-copy-update should be used.
+ inode_const_ptr inode = empty_inode;
+ xattr_map_const_ptr xattrs;
+ old_inode_map_const_ptr old_inodes; // key = last, value.first = first
};
inline void decode_noshare(InodeStoreBase::mempool_xattr_map& xattrs,
class InodeStore : public InodeStoreBase {
public:
+ mempool_inode* get_inode() {
+ if (inode == empty_inode)
+ reset_inode(allocate_inode());
+ return const_cast<mempool_inode*>(inode.get());
+ }
+ mempool_xattr_map* get_xattrs() { return const_cast<mempool_xattr_map*>(xattrs.get()); }
+
void encode(ceph::buffer::list &bl, uint64_t features) const {
InodeStoreBase::encode(bl, features, &snap_blob);
}
static void generate_test_instances(std::list<InodeStore*>& ls);
- // FIXME ceph::buffer::list not part of mempool
+ using InodeStoreBase::inode;
+ using InodeStoreBase::xattrs;
+ using InodeStoreBase::old_inodes;
+
+ // FIXME bufferlist not part of mempool
ceph::buffer::list snap_blob; // Encoded copy of SnapRealm, because we can't
// rehydrate it without full MDCache
};
ScrubHeaderRef header;
};
- /**
- * Projection methods, used to store inode changes until they have been journaled,
- * at which point they are popped.
- * Usage:
- * project_inode as needed. If you're changing xattrs or sr_t, then pass true
- * as needed then change the xattrs/snapnode member as needed. (Dirty
- * exception: project_past_snaprealm_parent allows you to project the
- * snapnode after doing project_inode (i.e. you don't need to pass
- * snap=true).
- *
- * Then, journal. Once journaling is done, pop_and_dirty_projected_inode.
- * This function will take care of the inode itself, the xattrs, and the snaprealm.
- */
-
- class projected_inode {
- public:
- static sr_t* const UNDEF_SRNODE;
-
- projected_inode() = delete;
- explicit projected_inode(const mempool_inode &in) : inode(in) {}
-
- mempool_inode inode;
- std::unique_ptr<mempool_xattr_map> xattrs;
- sr_t *snapnode = UNDEF_SRNODE;
- };
-
// -- pins --
static const int PIN_DIRFRAG = -1;
static const int PIN_CAPS = 2; // client caps
close_dirfrags();
close_snaprealm();
clear_file_locks();
- ceph_assert(num_projected_xattrs == 0);
ceph_assert(num_projected_srnodes == 0);
ceph_assert(num_caps_wanted == 0);
ceph_assert(num_subtree_roots == 0);
bool is_multiversion() const {
return snaprealm || // other snaprealms will link to me
- inode.is_dir() || // links to me in other snaps
- inode.nlink > 1 || // there are remote links, possibly snapped, that will need to find me
- !old_inodes.empty(); // once multiversion, always multiversion. until old_inodes gets cleaned out.
+ get_inode()->is_dir() || // links to me in other snaps
+ get_inode()->nlink > 1 || // there are remote links, possibly snapped, that will need to find me
+ is_any_old_inodes(); // once multiversion, always multiversion. until old_inodes gets cleaned out.
}
snapid_t get_oldest_snap();
void mark_dirty_rstat();
void clear_dirty_rstat();
- CInode::projected_inode &project_inode(bool xattr = false, bool snap = false);
- void pop_and_dirty_projected_inode(LogSegment *ls);
+ //bool hack_accessed = false;
+ //utime_t hack_load_stamp;
- projected_inode *get_projected_node() {
- if (projected_nodes.empty())
- return NULL;
- else
- return &projected_nodes.back();
- }
+ /**
+ * Projection methods, used to store inode changes until they have been journaled,
+ * at which point they are popped.
+ * Usage:
+ * project_inode as needed. If you're changing xattrs or sr_t, then pass true
+ * as needed then change the xattrs/snapnode member as needed. (Dirty
+ * exception: project_past_snaprealm_parent allows you to project the
+ * snapnode after doing project_inode (i.e. you don't need to pass
+ * snap=true).
+ *
+ * Then, journal. Once journaling is done, pop_and_dirty_projected_inode.
+ * This function will take care of the inode itself, the xattrs, and the snaprealm.
+ */
+
+ struct projected_inode {
+ static sr_t* const UNDEF_SRNODE;
+
+ inode_ptr const inode;
+ xattr_map_ptr const xattrs;
+ sr_t* const snapnode;
+
+ projected_inode() = delete;
+ explicit projected_inode(inode_ptr&& i, xattr_map_ptr&& x, sr_t *s) :
+ inode(std::move(i)), xattrs(std::move(x)), snapnode(s) {}
+ };
+ projected_inode project_inode(bool xattr = false, bool snap = false);
+
+ void pop_and_dirty_projected_inode(LogSegment *ls);
version_t get_projected_version() const {
if (projected_nodes.empty())
- return inode.version;
+ return get_inode()->version;
else
- return projected_nodes.back().inode.version;
+ return projected_nodes.back().inode->version;
}
bool is_projected() const {
return !projected_nodes.empty();
}
- const mempool_inode *get_projected_inode() const {
+ const inode_const_ptr& get_projected_inode() const {
if (projected_nodes.empty())
- return &inode;
+ return get_inode();
else
- return &projected_nodes.back().inode;
+ return projected_nodes.back().inode;
+ }
+ // inode should have already been projected in caller's context
+ mempool_inode* _get_projected_inode() {
+ ceph_assert(!projected_nodes.empty());
+ return const_cast<mempool_inode*>(projected_nodes.back().inode.get());
}
- mempool_inode *get_projected_inode() {
+ const inode_const_ptr& get_previous_projected_inode() const {
+ ceph_assert(!projected_nodes.empty());
+ auto it = projected_nodes.rbegin();
+ ++it;
+ if (it != projected_nodes.rend())
+ return it->inode;
+ else
+ return get_inode();
+ }
+
+ const xattr_map_const_ptr& get_projected_xattrs() {
if (projected_nodes.empty())
- return &inode;
+ return xattrs;
else
- return &projected_nodes.back().inode;
+ return projected_nodes.back().xattrs;
}
- mempool_inode *get_previous_projected_inode() {
+ const xattr_map_const_ptr& get_previous_projected_xattrs() {
ceph_assert(!projected_nodes.empty());
auto it = projected_nodes.rbegin();
++it;
if (it != projected_nodes.rend())
- return &it->inode;
+ return it->xattrs;
else
- return &inode;
+ return xattrs;
}
- mempool_xattr_map *get_projected_xattrs();
- mempool_xattr_map *get_previous_projected_xattrs();
-
sr_t *prepare_new_srnode(snapid_t snapid);
void project_snaprealm(sr_t *new_srnode);
sr_t *project_snaprealm(snapid_t snapid=0) {
void project_snaprealm_past_parent(SnapRealm *newparent);
void early_pop_projected_snaprealm();
- mempool_old_inode& cow_old_inode(snapid_t follows, bool cow_head);
+ const mempool_old_inode& cow_old_inode(snapid_t follows, bool cow_head);
void split_old_inode(snapid_t snap);
- mempool_old_inode *pick_old_inode(snapid_t last);
+ snapid_t pick_old_inode(snapid_t last) const;
void pre_cow_old_inode();
bool has_snap_data(snapid_t s);
void purge_stale_snap_data(const std::set<snapid_t>& snaps);
std::pair<bool,bool> split_need_snapflush(CInode *cowin, CInode *in);
// -- accessors --
- bool is_root() const { return inode.ino == MDS_INO_ROOT; }
- bool is_stray() const { return MDS_INO_IS_STRAY(inode.ino); }
+
+ inodeno_t ino() const { return get_inode()->ino; }
+ vinodeno_t vino() const { return vinodeno_t(ino(), last); }
+ int d_type() const { return IFTODT(get_inode()->mode); }
+ bool is_root() const { return ino() == MDS_INO_ROOT; }
+ bool is_stray() const { return MDS_INO_IS_STRAY(ino()); }
mds_rank_t get_stray_owner() const {
- return (mds_rank_t)MDS_INO_STRAY_OWNER(inode.ino);
+ return (mds_rank_t)MDS_INO_STRAY_OWNER(ino());
}
- bool is_mdsdir() const { return MDS_INO_IS_MDSDIR(inode.ino); }
- bool is_base() const { return MDS_INO_IS_BASE(inode.ino); }
- bool is_system() const { return inode.ino < MDS_INO_SYSTEM_BASE; }
+ bool is_mdsdir() const { return MDS_INO_IS_MDSDIR(ino()); }
+ bool is_base() const { return MDS_INO_IS_BASE(ino()); }
+ bool is_system() const { return ino() < MDS_INO_SYSTEM_BASE; }
bool is_normal() const { return !(is_base() || is_system() || is_stray()); }
+ bool is_file() const { return get_inode()->is_file(); }
+ bool is_symlink() const { return get_inode()->is_symlink(); }
+ bool is_dir() const { return get_inode()->is_dir(); }
bool is_head() const { return last == CEPH_NOSNAP; }
void clear_ambiguous_auth(MDSContext::vec& finished);
void clear_ambiguous_auth();
- inodeno_t ino() const { return inode.ino; }
- vinodeno_t vino() const { return vinodeno_t(inode.ino, last); }
- int d_type() const { return IFTODT(inode.mode); }
+ const inode_const_ptr& get_inode() const {
+ return inode;
+ }
+
+ // only used for updating newly allocated CInode
+ mempool_inode* _get_inode() {
+ if (inode == empty_inode)
+ reset_inode(allocate_inode());
+ return const_cast<mempool_inode*>(inode.get());
+ }
+
+ const xattr_map_const_ptr& get_xattrs() const { return xattrs; }
+
+ bool is_any_old_inodes() const { return old_inodes && !old_inodes->empty(); }
+ const old_inode_map_const_ptr& get_old_inodes() const { return old_inodes; }
- mempool_inode& get_inode() { return inode; }
- const mempool_inode& get_inode() const { return inode; }
CDentry* get_parent_dn() { return parent; }
const CDentry* get_parent_dn() const { return parent; }
CDentry* get_projected_parent_dn() { return !projected_parent.empty() ? projected_parent.back() : parent; }
void name_stray_dentry(std::string& dname);
// -- dirtyness --
- version_t get_version() const { return inode.version; }
+ version_t get_version() const { return get_inode()->version; }
version_t pre_dirty();
void _mark_dirty(LogSegment *ls);
- void mark_dirty(version_t projected_dirv, LogSegment *ls);
+ void mark_dirty(LogSegment *ls);
void mark_clean();
void store(MDSContext *fin);
int get_caps_allowed_by_type(int type) const;
int get_caps_careful() const;
int get_xlocker_mask(client_t client) const;
- int get_caps_allowed_for_client(Session *s, Capability *cap, mempool_inode *file_i) const;
+ int get_caps_allowed_for_client(Session *s, Capability *cap,
+ const mempool_inode *file_i) const;
// caps issued, wanted
int get_caps_issued(int *ploner = 0, int *pother = 0, int *pxlocker = 0,
}
bool has_ephemeral_policy() const {
- return get_inode().export_ephemeral_random_pin > 0.0 ||
- get_inode().export_ephemeral_distributed_pin;
+ return get_inode()->export_ephemeral_random_pin > 0.0 ||
+ get_inode()->export_ephemeral_distributed_pin;
}
bool is_ephemerally_pinned() const {
return state_test(STATE_DISTEPHEMERALPIN) ||
bool _validate_disk_state(class ValidationContinuation *c,
int rval, int stage);
- mempool::mds_co::list<projected_inode> projected_nodes; // projected values (only defined while dirty)
- size_t num_projected_xattrs = 0;
+ struct projected_const_node {
+ inode_const_ptr inode;
+ xattr_map_const_ptr xattrs;
+ sr_t *snapnode;
+
+ projected_const_node() = delete;
+ projected_const_node(projected_const_node&&) = default;
+ explicit projected_const_node(const inode_const_ptr& i, const xattr_map_const_ptr& x, sr_t *s) :
+ inode(i), xattrs(x), snapnode(s) {}
+ };
+
+ mempool::mds_co::list<projected_const_node> projected_nodes; // projected values (only defined while dirty)
size_t num_projected_srnodes = 0;
// -- cache infrastructure --
version_t Locker::issue_file_data_version(CInode *in)
{
dout(7) << "issue_file_data_version on " << *in << dendl;
- return in->inode.file_data_version;
+ return in->get_inode()->file_data_version;
}
class C_Locker_FileUpdate_finish : public LockerLogContext {
allowed |= cap->get_lock_cache_allowed();
}
- if ((in->inode.inline_data.version != CEPH_INLINE_NONE &&
+ if ((in->get_inode()->inline_data.version != CEPH_INLINE_NONE &&
cap->is_noinline()) ||
- (!in->inode.layout.pool_ns.empty() &&
+ (!in->get_inode()->layout.pool_ns.empty() &&
cap->is_nopoolns()))
allowed &= ~(CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR);
}
// notify clients about deleted inode, to make sure they release caps ASAP.
- if (in->inode.nlink == 0)
+ if (in->get_inode()->nlink == 0)
wanted |= CEPH_CAP_LINK_SHARED;
// are there caps that the client _wants_ and can have, but aren't pending?
cap->revoke();
- if (in->is_auth() && in->inode.client_ranges.count(cap->get_client()))
+ if (in->is_auth() && in->get_inode()->client_ranges.count(cap->get_client()))
in->state_set(CInode::STATE_NEEDSRECOVER);
if (in->state_test(CInode::STATE_EXPORTINGCAPS))
eval_lock_caches(cap);
if (in->is_auth() &&
- in->inode.client_ranges.count(cap->get_client()))
+ in->get_inode()->client_ranges.count(cap->get_client()))
in->state_set(CInode::STATE_NEEDSRECOVER);
// eval lock/inode may finish contexts, which may modify other cap's position
}
};
-uint64_t Locker::calc_new_max_size(CInode::mempool_inode *pi, uint64_t size)
+uint64_t Locker::calc_new_max_size(const CInode::inode_const_ptr &pi, uint64_t size)
{
uint64_t new_max = (size + 1) << 1;
uint64_t max_inc = g_conf()->mds_client_writeable_range_max_inc_objs;
CInode::mempool_inode::client_range_map *new_ranges,
bool *max_increased)
{
- auto latest = in->get_projected_inode();
+ const auto& latest = in->get_projected_inode();
uint64_t ms;
if (latest->has_layout()) {
ms = calc_new_max_size(latest, size);
if ((p.second.issued() | p.second.wanted()) & CEPH_CAP_ANY_FILE_WR) {
client_writeable_range_t& nr = (*new_ranges)[p.first];
nr.range.first = 0;
- if (latest->client_ranges.count(p.first)) {
- client_writeable_range_t& oldr = latest->client_ranges[p.first];
+ auto it = latest->client_ranges.find(p.first);
+ if (it != latest->client_ranges.end()) {
+ const client_writeable_range_t& oldr = it->second;
if (ms > oldr.range.last)
*max_increased = true;
nr.range.last = std::max(ms, oldr.range.last);
ceph_assert(in->is_auth());
ceph_assert(in->is_file());
- CInode::mempool_inode *latest = in->get_projected_inode();
+ const auto& latest = in->get_projected_inode();
CInode::mempool_inode::client_range_map new_ranges;
uint64_t size = latest->size;
bool update_size = new_size > 0;
MutationRef mut(new MutationImpl());
mut->ls = mds->mdlog->get_current_segment();
- auto &pi = in->project_inode();
- pi.inode.version = in->pre_dirty();
+ auto pi = in->project_inode();
+ pi.inode->version = in->pre_dirty();
if (update_max) {
- dout(10) << "check_inode_max_size client_ranges " << pi.inode.client_ranges << " -> " << new_ranges << dendl;
- pi.inode.client_ranges = new_ranges;
+ dout(10) << "check_inode_max_size client_ranges " << pi.inode->client_ranges << " -> " << new_ranges << dendl;
+ pi.inode->client_ranges = new_ranges;
}
if (update_size) {
- dout(10) << "check_inode_max_size size " << pi.inode.size << " -> " << new_size << dendl;
- pi.inode.size = new_size;
- pi.inode.rstat.rbytes = new_size;
- dout(10) << "check_inode_max_size mtime " << pi.inode.mtime << " -> " << new_mtime << dendl;
- pi.inode.mtime = new_mtime;
- if (new_mtime > pi.inode.ctime) {
- pi.inode.ctime = new_mtime;
- if (new_mtime > pi.inode.rstat.rctime)
- pi.inode.rstat.rctime = new_mtime;
+ dout(10) << "check_inode_max_size size " << pi.inode->size << " -> " << new_size << dendl;
+ pi.inode->size = new_size;
+ pi.inode->rstat.rbytes = new_size;
+ dout(10) << "check_inode_max_size mtime " << pi.inode->mtime << " -> " << new_mtime << dendl;
+ pi.inode->mtime = new_mtime;
+ if (new_mtime > pi.inode->ctime) {
+ pi.inode->ctime = new_mtime;
+ if (new_mtime > pi.inode->rstat.rctime)
+ pi.inode->rstat.rctime = new_mtime;
}
}
m->xattrbl.length() &&
m->head.xattr_version > in->get_projected_inode()->xattr_version;
- CInode::mempool_old_inode *oi = 0;
- if (in->is_multiversion()) {
- oi = in->pick_old_inode(snap);
+ CInode::mempool_old_inode *oi = nullptr;
+ CInode::old_inode_map_ptr _old_inodes;
+ if (in->is_any_old_inodes()) {
+ auto last = in->pick_old_inode(snap);
+ if (last) {
+ _old_inodes = CInode::allocate_old_inode_map(*in->get_old_inodes());
+ oi = &_old_inodes->at(last);
+ if (snap > oi->first) {
+ (*_old_inodes)[snap - 1] = *oi;;
+ oi->first = snap;
+ }
+ }
}
CInode::mempool_inode *i;
if (oi) {
dout(10) << " writing into old inode" << dendl;
- auto &pi = in->project_inode();
- pi.inode.version = in->pre_dirty();
- if (snap > oi->first)
- in->split_old_inode(snap);
+ auto pi = in->project_inode();
+ pi.inode->version = in->pre_dirty();
i = &oi->inode;
if (xattrs)
px = &oi->xattrs;
} else {
- auto &pi = in->project_inode(xattrs);
- pi.inode.version = in->pre_dirty();
- i = &pi.inode;
+ auto pi = in->project_inode(xattrs);
+ pi.inode->version = in->pre_dirty();
+ i = pi.inode.get();
if (xattrs)
px = pi.xattrs.get();
}
}
}
+ if (_old_inodes)
+ in->reset_old_inodes(std::move(_old_inodes));
+
mut->auth_pin(in);
mdcache->predirty_journal_parents(mut, &le->metablob, in, 0, PREDIRTY_PRIMARY, 0, follows);
mdcache->journal_dirty_inode(mut.get(), &le->metablob, in, follows);
if (mtime > pi->rstat.rctime)
pi->rstat.rctime = mtime;
}
- if (in->inode.is_file() && // ONLY if regular file
+ if (in->is_file() && // ONLY if regular file
size > pi->size) {
dout(7) << " size " << pi->size << " -> " << size
<< " for " << *in << dendl;
pi->size = size;
pi->rstat.rbytes = size;
}
- if (in->inode.is_file() &&
+ if (in->is_file() &&
(dirty & CEPH_CAP_FILE_WR) &&
inline_version > pi->inline_data.version) {
pi->inline_data.version = inline_version;
if (inline_version != CEPH_INLINE_NONE && m->inline_data.length() > 0)
- pi->inline_data.get_data() = m->inline_data;
+ pi->inline_data.set_data(m->inline_data);
else
pi->inline_data.free_data();
}
<< " on " << *in << dendl;
ceph_assert(in->is_auth());
client_t client = m->get_source().num();
- CInode::mempool_inode *latest = in->get_projected_inode();
+ const auto& latest = in->get_projected_inode();
// increase or zero max_size?
uint64_t size = m->get_size();
bool change_max = false;
- uint64_t old_max = latest->client_ranges.count(client) ? latest->client_ranges[client].range.last : 0;
+ uint64_t old_max;
+ {
+ auto it = latest->client_ranges.find(client);
+ old_max = it != latest->client_ranges.end() ? it->second.range.last: 0;
+ }
uint64_t new_max = old_max;
if (in->is_file()) {
m->xattrbl.length() &&
m->head.xattr_version > in->get_projected_inode()->xattr_version;
- auto &pi = in->project_inode(xattr);
- pi.inode.version = in->pre_dirty();
+ auto pi = in->project_inode(xattr);
+ pi.inode->version = in->pre_dirty();
MutationRef mut(new MutationImpl());
mut->ls = mds->mdlog->get_current_segment();
- _update_cap_fields(in, dirty, m, &pi.inode);
+ _update_cap_fields(in, dirty, m, pi.inode.get());
if (change_max) {
dout(7) << " max_size " << old_max << " -> " << new_max
<< " for " << *in << dendl;
if (new_max) {
- auto &cr = pi.inode.client_ranges[client];
+ auto &cr = pi.inode->client_ranges[client];
cr.range.first = 0;
cr.range.last = new_max;
cr.follows = in->first - 1;
if (cap)
cap->mark_clientwriteable();
} else {
- pi.inode.client_ranges.erase(client);
+ pi.inode->client_ranges.erase(client);
if (cap)
cap->clear_clientwriteable();
}
// xattrs update?
if (xattr) {
- dout(7) << " xattrs v" << pi.inode.xattr_version << " -> " << m->head.xattr_version << dendl;
- pi.inode.xattr_version = m->head.xattr_version;
+ dout(7) << " xattrs v" << pi.inode->xattr_version << " -> " << m->head.xattr_version << dendl;
+ pi.inode->xattr_version = m->head.xattr_version;
auto p = m->xattrbl.cbegin();
decode_noshare(*pi.xattrs, p);
wrlock_force(&in->xattrlock, mut);
if (in->is_auth()) {
// make sure we clear out the client byte range
if (in->get_projected_inode()->client_ranges.count(client) &&
- !(in->inode.nlink == 0 && !in->is_any_caps())) { // unless it's unlink + stray
+ !(in->get_inode()->nlink == 0 && !in->is_any_caps())) { // unless it's unlink + stray
if (kill)
in->state_set(CInode::STATE_NEEDSRECOVER);
else
void Locker::scatter_writebehind(ScatterLock *lock)
{
CInode *in = static_cast<CInode*>(lock->get_parent());
- dout(10) << "scatter_writebehind " << in->inode.mtime << " on " << *lock << " on " << *in << dendl;
+ dout(10) << "scatter_writebehind " << in->get_inode()->mtime << " on " << *lock << " on " << *in << dendl;
// journal
MutationRef mut(new MutationImpl());
in->pre_cow_old_inode(); // avoid cow mayhem
- auto &pi = in->project_inode();
- pi.inode.version = in->pre_dirty();
+ auto pi = in->project_inode();
+ pi.inode->version = in->pre_dirty();
in->finish_scatter_gather_update(lock->get_type());
lock->start_flush();
friend class LockerLogContext;
bool any_late_revoking_caps(xlist<Capability*> const &revoking, double timeout) const;
- uint64_t calc_new_max_size(CInode::mempool_inode *pi, uint64_t size);
+ uint64_t calc_new_max_size(const CInode::inode_const_ptr& pi, uint64_t size);
MDSRank *mds;
MDCache *mdcache;
mds->logger->set(l_mds_inodes_with_caps, num_inodes_with_caps);
mds->logger->set(l_mds_caps, Capability::count());
if (root) {
- mds->logger->set(l_mds_root_rfiles, root->inode.rstat.rfiles);
- mds->logger->set(l_mds_root_rbytes, root->inode.rstat.rbytes);
- mds->logger->set(l_mds_root_rsnaps, root->inode.rstat.rsnaps);
+ mds->logger->set(l_mds_root_rfiles, root->get_inode()->rstat.rfiles);
+ mds->logger->set(l_mds_root_rbytes, root->get_inode()->rstat.rbytes);
+ mds->logger->set(l_mds_root_rsnaps, root->get_inode()->rstat.rsnaps);
}
}
default_log_layout = gen_default_log_layout(*(mds->mdsmap));
}
-void MDCache::create_unlinked_system_inode(CInode *in, inodeno_t ino,
- int mode) const
+void MDCache::create_unlinked_system_inode(CInode *in, inodeno_t ino, int mode) const
{
- in->inode.ino = ino;
- in->inode.version = 1;
- in->inode.xattr_version = 1;
- in->inode.mode = 0500 | mode;
- in->inode.size = 0;
- in->inode.ctime =
- in->inode.mtime =
- in->inode.btime = ceph_clock_now();
- in->inode.nlink = 1;
- in->inode.truncate_size = -1ull;
- in->inode.change_attr = 0;
- in->inode.export_pin = MDS_RANK_NONE;
+ auto _inode = in->_get_inode();
+ _inode->ino = ino;
+ _inode->version = 1;
+ _inode->xattr_version = 1;
+ _inode->mode = 0500 | mode;
+ _inode->size = 0;
+ _inode->ctime = _inode->mtime = _inode->btime = ceph_clock_now();
+ _inode->nlink = 1;
+ _inode->truncate_size = -1ull;
+ _inode->change_attr = 0;
+ _inode->export_pin = MDS_RANK_NONE;
// FIPS zeroization audit 20191117: this memset is not security related.
- memset(&in->inode.dir_layout, 0, sizeof(in->inode.dir_layout));
- if (in->inode.is_dir()) {
- in->inode.dir_layout.dl_dir_hash = g_conf()->mds_default_dir_hash;
- in->inode.rstat.rsubdirs = 1; /* itself */
- in->inode.rstat.rctime = in->inode.ctime;
+ memset(&_inode->dir_layout, 0, sizeof(_inode->dir_layout));
+ if (_inode->is_dir()) {
+ _inode->dir_layout.dl_dir_hash = g_conf()->mds_default_dir_hash;
+ _inode->rstat.rsubdirs = 1; /* itself */
+ _inode->rstat.rctime = in->get_inode()->ctime;
} else {
- in->inode.layout = default_file_layout;
- ++in->inode.rstat.rfiles;
+ _inode->layout = default_file_layout;
+ ++_inode->rstat.rfiles;
}
- in->inode.accounted_rstat = in->inode.rstat;
+ _inode->accounted_rstat = _inode->rstat;
if (in->is_base()) {
if (in->is_root())
CInode *MDCache::create_root_inode()
{
- CInode *i = create_system_inode(MDS_INO_ROOT, S_IFDIR|0755);
- i->inode.uid = g_conf()->mds_root_ino_uid;
- i->inode.gid = g_conf()->mds_root_ino_gid;
- i->inode.layout = default_file_layout;
- i->inode.layout.pool_id = mds->mdsmap->get_first_data_pool();
- return i;
+ CInode *in = create_system_inode(MDS_INO_ROOT, S_IFDIR|0755);
+ auto _inode = in->_get_inode();
+ _inode->uid = g_conf()->mds_root_ino_uid;
+ _inode->gid = g_conf()->mds_root_ino_gid;
+ _inode->layout = default_file_layout;
+ _inode->layout.pool_id = mds->mdsmap->get_first_data_pool();
+ return in;
}
void MDCache::create_empty_hierarchy(MDSGather *gather)
rootdir->dir_rep = CDir::REP_ALL; //NONE;
ceph_assert(rootdir->fnode.accounted_fragstat == rootdir->fnode.fragstat);
- ceph_assert(rootdir->fnode.fragstat == root->inode.dirstat);
+ ceph_assert(rootdir->fnode.fragstat == root->get_inode()->dirstat);
ceph_assert(rootdir->fnode.accounted_rstat == rootdir->fnode.rstat);
/* Do no update rootdir rstat information of the fragment, rstat upkeep magic
* assume version 0 is stale/invalid.
rootdir->mark_dirty(rootdir->pre_dirty(), mds->mdlog->get_current_segment());
rootdir->commit(0, gather->new_sub());
- root->mark_clean();
- root->mark_dirty(root->pre_dirty(), mds->mdlog->get_current_segment());
- root->mark_dirty_parent(mds->mdlog->get_current_segment(), true);
- root->flush(gather->new_sub());
+ root->store(gather->new_sub());
}
void MDCache::create_mydir_hierarchy(MDSGather *gather)
CDentry *sdn = mydir->add_primary_dentry(name.str(), stray);
sdn->_mark_dirty(mds->mdlog->get_current_segment());
- stray->inode.dirstat = straydir->fnode.fragstat;
+ stray->_get_inode()->dirstat = straydir->fnode.fragstat;
- mydir->fnode.rstat.add(stray->inode.rstat);
+ mydir->fnode.rstat.add(stray->get_inode()->rstat);
mydir->fnode.fragstat.nsubdirs++;
// save them
straydir->mark_complete();
mydir->fnode.accounted_fragstat = mydir->fnode.fragstat;
mydir->fnode.accounted_rstat = mydir->fnode.rstat;
- myin->inode.dirstat = mydir->fnode.fragstat;
- myin->inode.rstat = mydir->fnode.rstat;
- ++myin->inode.rstat.rsubdirs;
- myin->inode.accounted_rstat = myin->inode.rstat;
+ auto inode = myin->_get_inode();
+ inode->dirstat = mydir->fnode.fragstat;
+ inode->rstat = mydir->fnode.rstat;
+ ++inode->rstat.rsubdirs;
+ inode->accounted_rstat = inode->rstat;
mydir->mark_complete();
mydir->mark_dirty(mydir->pre_dirty(), ls);
version_t dpv = dn->pre_dirty();
CDir *mdir = 0;
- if (in->inode.is_dir()) {
- in->inode.rstat.rsubdirs = 1;
+ auto inode = in->_get_inode();
+ if (in->is_dir()) {
+ inode->rstat.rsubdirs = 1;
mdir = in->get_or_open_dirfrag(this, frag_t());
mdir->mark_complete();
mdir->pre_dirty();
} else
- in->inode.rstat.rfiles = 1;
- in->inode.version = dn->pre_dirty();
+ inode->rstat.rfiles = 1;
+ inode->version = dn->pre_dirty();
SnapRealm *realm = dir->get_inode()->find_snaprealm();
dn->first = in->first = realm->get_newest_seq() + 1;
dn->mark_dirty(dpv, mut->ls);
CInode *in = dn->get_linkage()->get_inode();
- in->inode.version--;
- in->mark_dirty(in->inode.version + 1, mut->ls);
+ in->mark_dirty(mut->ls);
- if (in->inode.is_dir()) {
+ if (in->is_dir()) {
CDir *dir = in->get_dirfrag(frag_t());
ceph_assert(dir);
dir->mark_dirty(1, mut->ls);
ceph_assert(last >= in->first);
CInode *oldin = new CInode(this, true, in->first, last);
- oldin->inode = *in->get_previous_projected_inode();
- oldin->xattrs = *in->get_previous_projected_xattrs();
+ auto _inode = CInode::allocate_inode(*in->get_previous_projected_inode());
+ _inode->trim_client_ranges(last);
+ oldin->reset_inode(std::move(_inode));
+ auto _xattrs = in->get_previous_projected_xattrs();
+ oldin->reset_xattrs(std::move(_xattrs));
+
oldin->symlink = in->symlink;
- oldin->inode.trim_client_ranges(last);
if (in->first < in->oldest_snap)
in->oldest_snap = in->first;
if (pcow_inode)
*pcow_inode = oldin;
CDentry *olddn = dn->dir->add_primary_dentry(dn->get_name(), oldin, oldfirst, follows);
- oldin->inode.version = olddn->pre_dirty();
+ oldin->_get_inode()->version = olddn->pre_dirty();
dout(10) << " olddn " << *olddn << dendl;
bool need_snapflush = !oldin->client_snap_caps.empty();
if (need_snapflush) {
int linkunlink, SnapRealm *prealm)
{
CDentry *parentdn = cur->get_projected_parent_dn();
- CInode::mempool_inode *curi = cur->get_projected_inode();
if (cur->first > first)
first = cur->first;
ceph_assert(cur->is_frozen_inode());
update = false;
}
- _project_rstat_inode_to_frag(*curi, std::max(first, floor), cur->last, parent,
+ // hacky
+ const CInode::mempool_inode *pi;
+ if (update && cur->is_projected()) {
+ pi = cur->_get_projected_inode();
+ } else {
+ pi = cur->get_projected_inode().get();
+ if (update) {
+ // new inode
+ ceph_assert(pi->rstat == pi->accounted_rstat);
+ update = false;
+ }
+ }
+ _project_rstat_inode_to_frag(pi, std::max(first, floor), cur->last, parent,
linkunlink, update);
}
if (g_conf()->mds_snap_rstat) {
for (const auto &p : cur->dirty_old_rstats) {
- auto &old = cur->old_inodes[p];
+ const auto &old = cur->get_old_inodes()->at(p);
snapid_t ofirst = std::max(old.first, floor);
auto it = snaps.lower_bound(ofirst);
if (it == snaps.end() || *it > p)
continue;
if (p >= floor)
- _project_rstat_inode_to_frag(old.inode, ofirst, p, parent, 0, false);
+ _project_rstat_inode_to_frag(&old.inode, ofirst, p, parent, 0, false);
}
}
cur->dirty_old_rstats.clear();
}
-void MDCache::_project_rstat_inode_to_frag(CInode::mempool_inode& inode, snapid_t ofirst, snapid_t last,
+void MDCache::_project_rstat_inode_to_frag(const CInode::mempool_inode* inode, snapid_t ofirst, snapid_t last,
CDir *parent, int linkunlink, bool update_inode)
{
dout(10) << "_project_rstat_inode_to_frag [" << ofirst << "," << last << "]" << dendl;
- dout(20) << " inode rstat " << inode.rstat << dendl;
- dout(20) << " inode accounted_rstat " << inode.accounted_rstat << dendl;
+ dout(20) << " inode rstat " << inode->rstat << dendl;
+ dout(20) << " inode accounted_rstat " << inode->accounted_rstat << dendl;
nest_info_t delta;
if (linkunlink == 0) {
- delta.add(inode.rstat);
- delta.sub(inode.accounted_rstat);
+ delta.add(inode->rstat);
+ delta.sub(inode->accounted_rstat);
} else if (linkunlink < 0) {
- delta.sub(inode.accounted_rstat);
+ delta.sub(inode->accounted_rstat);
} else {
- delta.add(inode.rstat);
+ delta.add(inode->rstat);
}
dout(20) << " delta " << delta << dendl;
- if (update_inode)
- inode.accounted_rstat = inode.rstat;
while (last >= ofirst) {
/*
dout(20) << " project to [" << first << "," << last << "] " << *prstat << dendl;
ceph_assert(last >= first);
prstat->add(delta);
- if (update_inode)
- inode.accounted_rstat = inode.rstat;
dout(20) << " result [" << first << "," << last << "] " << *prstat << " " << *parent << dendl;
last = first-1;
}
+
+ if (update_inode) {
+ auto _inode = const_cast<CInode::mempool_inode*>(inode);
+ _inode->accounted_rstat = _inode->rstat;
+ }
}
void MDCache::project_rstat_frag_to_inode(nest_info_t& rstat, nest_info_t& accounted_rstat,
delta.sub(accounted_rstat);
dout(20) << " delta " << delta << dendl;
+ CInode::old_inode_map_ptr _old_inodes;
while (last >= ofirst) {
CInode::mempool_inode *pi;
snapid_t first;
if (last == pin->last) {
- pi = pin->get_projected_inode();
+ pi = pin->_get_projected_inode();
first = std::max(ofirst, pin->first);
if (first > pin->first) {
- auto &old = pin->cow_old_inode(first-1, cow_head);
+ auto& old = pin->cow_old_inode(first-1, cow_head);
dout(20) << " cloned old_inode rstat is " << old.inode.rstat << dendl;
}
} else {
+ if (!_old_inodes) {
+ _old_inodes = CInode::allocate_old_inode_map();
+ if (pin->is_any_old_inodes())
+ *_old_inodes = *pin->get_old_inodes();
+ }
if (last >= pin->first) {
first = pin->first;
pin->cow_old_inode(last, cow_head);
} else {
// our life is easier here because old_inodes is not sparse
// (although it may not begin at snapid 1)
- auto it = pin->old_inodes.lower_bound(last);
- if (it == pin->old_inodes.end()) {
+ auto it = _old_inodes->lower_bound(last);
+ if (it == _old_inodes->end()) {
dout(10) << " no old_inode <= " << last << ", done." << dendl;
break;
}
if (it->first > last) {
dout(10) << " splitting right old_inode [" << first << "," << it->first << "] to ["
<< (last+1) << "," << it->first << "]" << dendl;
- pin->old_inodes[last] = it->second;
+ (*_old_inodes)[last] = it->second;
it->second.first = last+1;
pin->dirty_old_rstats.insert(it->first);
}
if (first < ofirst) {
dout(10) << " splitting left old_inode [" << first << "," << last << "] to ["
<< first << "," << ofirst-1 << "]" << dendl;
- pin->old_inodes[ofirst-1] = pin->old_inodes[last];
+ (*_old_inodes)[ofirst-1] = (*_old_inodes)[last];
pin->dirty_old_rstats.insert(ofirst-1);
- pin->old_inodes[last].first = first = ofirst;
+ (*_old_inodes)[last].first = first = ofirst;
}
- pi = &pin->old_inodes[last].inode;
+ pi = &(*_old_inodes)[last].inode;
pin->dirty_old_rstats.insert(last);
}
dout(20) << " projecting to [" << first << "," << last << "] " << pi->rstat << dendl;
last = first-1;
}
+ if (_old_inodes)
+ pin->reset_old_inodes(std::move(_old_inodes));
}
void MDCache::broadcast_quota_to_client(CInode *in, client_t exclude_ct, bool quota_change)
if (!in->is_auth() || in->is_frozen())
return;
- auto i = in->get_projected_inode();
-
- if (!i->quota.is_enable() &&
- !quota_change)
+ const auto& pi = in->get_projected_inode();
+ if (!pi->quota.is_enable() && !quota_change)
return;
// creaete snaprealm for quota inode (quota was set before mimic)
if (exclude_ct >= 0 && exclude_ct != p.first)
goto update;
- if (cap->last_rbytes == i->rstat.rbytes &&
- cap->last_rsize == i->rstat.rsize())
+ if (cap->last_rbytes == pi->rstat.rbytes &&
+ cap->last_rsize == pi->rstat.rsize())
continue;
- if (i->quota.max_files > 0) {
- if (i->rstat.rsize() >= i->quota.max_files)
+ if (pi->quota.max_files > 0) {
+ if (pi->rstat.rsize() >= pi->quota.max_files)
goto update;
- if ((abs(cap->last_rsize - i->quota.max_files) >> 4) <
- abs(cap->last_rsize - i->rstat.rsize()))
+ if ((abs(cap->last_rsize - pi->quota.max_files) >> 4) <
+ abs(cap->last_rsize - pi->rstat.rsize()))
goto update;
}
- if (i->quota.max_bytes > 0) {
- if (i->rstat.rbytes > i->quota.max_bytes - (i->quota.max_bytes >> 3))
+ if (pi->quota.max_bytes > 0) {
+ if (pi->rstat.rbytes > pi->quota.max_bytes - (pi->quota.max_bytes >> 3))
goto update;
- if ((abs(cap->last_rbytes - i->quota.max_bytes) >> 4) <
- abs(cap->last_rbytes - i->rstat.rbytes))
+ if ((abs(cap->last_rbytes - pi->quota.max_bytes) >> 4) <
+ abs(cap->last_rbytes - pi->rstat.rbytes))
goto update;
}
continue;
update:
- cap->last_rsize = i->rstat.rsize();
- cap->last_rbytes = i->rstat.rbytes;
+ cap->last_rsize = pi->rstat.rsize();
+ cap->last_rbytes = pi->rstat.rbytes;
auto msg = make_message<MClientQuota>();
msg->ino = in->ino();
- msg->rstat = i->rstat;
- msg->quota = i->quota;
+ msg->rstat = pi->rstat;
+ msg->quota = pi->quota;
mds->send_message_client_counted(msg, cap->get_session());
}
for (const auto &it : in->get_replicas()) {
pin->pre_cow_old_inode(); // avoid cow mayhem!
- auto &pi = pin->project_inode();
- pi.inode.version = pin->pre_dirty();
+ auto pi = pin->project_inode();
+ pi.inode->version = pin->pre_dirty();
// dirstat
if (do_parent_mtime || linkunlink) {
dout(20) << "predirty_journal_parents add_delta " << pf->fragstat << dendl;
dout(20) << "predirty_journal_parents - " << pf->accounted_fragstat << dendl;
bool touched_mtime = false, touched_chattr = false;
- pi.inode.dirstat.add_delta(pf->fragstat, pf->accounted_fragstat, &touched_mtime, &touched_chattr);
+ pi.inode->dirstat.add_delta(pf->fragstat, pf->accounted_fragstat, &touched_mtime, &touched_chattr);
pf->accounted_fragstat = pf->fragstat;
if (touched_mtime)
- pi.inode.mtime = pi.inode.ctime = pi.inode.dirstat.mtime;
+ pi.inode->mtime = pi.inode->ctime = pi.inode->dirstat.mtime;
if (touched_chattr)
- pi.inode.change_attr = pi.inode.dirstat.change_attr;
- dout(20) << "predirty_journal_parents gives " << pi.inode.dirstat << " on " << *pin << dendl;
+ pi.inode->change_attr = pi.inode->dirstat.change_attr;
+ dout(20) << "predirty_journal_parents gives " << pi.inode->dirstat << " on " << *pin << dendl;
if (parent->get_frag() == frag_t()) { // i.e., we are the only frag
- if (pi.inode.dirstat.size() < 0)
+ if (pi.inode->dirstat.size() < 0)
ceph_assert(!"negative dirstat size" == g_conf()->mds_verify_scatter);
- if (pi.inode.dirstat.size() != pf->fragstat.size()) {
+ if (pi.inode->dirstat.size() != pf->fragstat.size()) {
mds->clog->error() << "unmatched fragstat size on single dirfrag "
- << parent->dirfrag() << ", inode has " << pi.inode.dirstat
+ << parent->dirfrag() << ", inode has " << pi.inode->dirstat
<< ", dirfrag has " << pf->fragstat;
// trust the dirfrag for now
- pi.inode.dirstat = pf->fragstat;
+ pi.inode->dirstat = pf->fragstat;
ceph_assert(!"unmatched fragstat size" == g_conf()->mds_verify_scatter);
}
pf->accounted_rstat = pf->rstat;
if (parent->get_frag() == frag_t()) { // i.e., we are the only frag
- if (pi.inode.rstat.rbytes != pf->rstat.rbytes) {
+ if (pi.inode->rstat.rbytes != pf->rstat.rbytes) {
mds->clog->error() << "unmatched rstat rbytes on single dirfrag "
- << parent->dirfrag() << ", inode has " << pi.inode.rstat
+ << parent->dirfrag() << ", inode has " << pi.inode->rstat
<< ", dirfrag has " << pf->rstat;
// trust the dirfrag for now
- pi.inode.rstat = pf->rstat;
+ pi.inode->rstat = pf->rstat;
ceph_assert(!"unmatched rstat rbytes" == g_conf()->mds_verify_scatter);
}
/* Remote strays with linkage (i.e. hardlinks) should not be
* expired, because they may be the target of
* a rename() as the owning MDS shuts down */
- if (!tin->is_stray() && tin->inode.nlink) {
+ if (!tin->is_stray() && tin->get_inode()->nlink) {
dout(10) << __func__ << ": stray still has linkage " << *tin << dendl;
return true;
}
CInode *MDCache::rejoin_invent_inode(inodeno_t ino, snapid_t last)
{
- CInode *in = new CInode(this, true, 1, last);
- in->inode.ino = ino;
+ CInode *in = new CInode(this, true, 2, last);
+ in->_get_inode()->ino = ino;
in->state_set(CInode::STATE_REJOINUNDEF);
add_inode(in);
rejoin_undef_inodes.insert(in);
in = rejoin_invent_inode(df.ino, CEPH_NOSNAP);
if (!in->is_dir()) {
ceph_assert(in->state_test(CInode::STATE_REJOINUNDEF));
- in->inode.mode = S_IFDIR;
- in->inode.dir_layout.dl_dir_hash = g_conf()->mds_default_dir_hash;
+ in->_get_inode()->mode = S_IFDIR;
+ in->_get_inode()->dir_layout.dl_dir_hash = g_conf()->mds_default_dir_hash;
}
CDir *dir = in->get_or_open_dirfrag(this, df.frag);
dir->state_set(CDir::STATE_REJOINUNDEF);
if (!diri) {
// barebones inode; the full inode loop below will clean up.
diri = new CInode(this, false);
- diri->inode.ino = p.first.ino;
- diri->inode.mode = S_IFDIR;
- diri->inode.dir_layout.dl_dir_hash = g_conf()->mds_default_dir_hash;
+ auto _inode = diri->_get_inode();
+ _inode->ino = p.first.ino;
+ _inode->mode = S_IFDIR;
+ _inode->dir_layout.dl_dir_hash = g_conf()->mds_default_dir_hash;
+
add_inode(diri);
if (MDS_INO_MDSDIR(from) == p.first.ino) {
diri->inode_auth = mds_authority_t(from, CDIR_AUTH_UNKNOWN);
dout(10) << " had bad linkage for " << *dn << dendl;
}
- // hmm, did we have the proper linkage here?
- if (dnl->is_null() && !q.second.is_null()) {
+ // hmm, did we have the proper linkage here?
+ if (dnl->is_null() && !q.second.is_null()) {
if (q.second.is_remote()) {
dn->dir->link_remote_inode(dn, q.second.remote_ino, q.second.remote_d_type);
} else {
if (!in) {
// barebones inode; assume it's dir, the full inode loop below will clean up.
in = new CInode(this, false, q.second.first, q.first.snapid);
- in->inode.ino = q.second.ino;
- in->inode.mode = S_IFDIR;
- in->inode.dir_layout.dl_dir_hash = g_conf()->mds_default_dir_hash;
+ auto _inode = in->_get_inode();
+ _inode->ino = q.second.ino;
+ _inode->mode = S_IFDIR;
+ _inode->dir_layout.dl_dir_hash = g_conf()->mds_default_dir_hash;
add_inode(in);
dout(10) << " add inode " << *in << dendl;
} else if (in->get_parent_dn()) {
dn->dir->link_primary_inode(dn, in);
isolated_inodes.erase(in);
}
- }
+ }
dn->set_replica_nonce(q.second.nonce);
dn->lock.set_state_rejoin(q.second.lock, rejoin_waiters, survivor);
if (in->last != CEPH_NOSNAP)
continue;
- if (in->is_auth() && !in->is_base() && in->inode.is_dirty_rstat())
+ if (in->is_auth() && !in->is_base() && in->get_inode()->is_dirty_rstat())
in->mark_dirty_rstat();
int dirty_caps = 0;
rejoin_undef_inodes.erase(in);
if (in->is_dir()) {
// FIXME: re-hash dentries if necessary
- ceph_assert(in->inode.dir_layout.dl_dir_hash == g_conf()->mds_default_dir_hash);
+ ceph_assert(in->get_inode()->dir_layout.dl_dir_hash == g_conf()->mds_default_dir_hash);
if (in->get_num_dirfrags() && !in->dirfragtree.is_leaf(frag_t())) {
CDir *dir = in->get_dirfrag(frag_t());
ceph_assert(dir);
}
bool recover = false;
- for (map<client_t,client_writeable_range_t>::iterator p = in->inode.client_ranges.begin();
- p != in->inode.client_ranges.end();
- ++p) {
- Capability *cap = in->get_client_cap(p->first);
+ for (auto& p : in->get_inode()->client_ranges) {
+ Capability *cap = in->get_client_cap(p.first);
if (cap) {
cap->mark_clientwriteable();
} else {
- dout(10) << " client." << p->first << " has range " << p->second << " but no cap on " << *in << dendl;
+ dout(10) << " client." << p.first << " has range " << p.second << " but no cap on " << *in << dendl;
recover = true;
break;
}
void MDCache::truncate_inode(CInode *in, LogSegment *ls)
{
- auto pi = in->get_projected_inode();
+ const auto& pi = in->get_projected_inode();
dout(10) << "truncate_inode "
<< pi->truncate_from << " -> " << pi->truncate_size
<< " on " << *in
void MDCache::_truncate_inode(CInode *in, LogSegment *ls)
{
- auto pi = &in->inode;
+ const auto& pi = in->get_inode();
dout(10) << "_truncate_inode "
<< pi->truncate_from << " -> " << pi->truncate_size
<< " on " << *in << dendl;
ceph_assert(in->last == CEPH_NOSNAP);
}
dout(10) << "_truncate_inode snapc " << snapc << " on " << *in << dendl;
- filer.truncate(in->inode.ino, &in->inode.layout, *snapc,
+ auto layout = pi->layout;
+ filer.truncate(in->ino(), &layout, *snapc,
pi->truncate_size, pi->truncate_from-pi->truncate_size,
pi->truncate_seq, ceph::real_time::min(), 0,
new C_OnFinisher(new C_IO_MDC_TruncateFinish(this, in, ls),
ls->truncating_inodes.erase(p);
// update
- auto &pi = in->project_inode();
- pi.inode.version = in->pre_dirty();
- pi.inode.truncate_from = 0;
- pi.inode.truncate_pending--;
+ auto pi = in->project_inode();
+ pi.inode->version = in->pre_dirty();
+ pi.inode->truncate_from = 0;
+ pi.inode->truncate_pending--;
MutationRef mut(new MutationImpl());
mut->ls = mds->mdlog->get_current_segment();
break;
CInode *diri = pdn->get_dir()->get_inode();
reply->ancestors.push_back(inode_backpointer_t(diri->ino(), pdn->get_name(),
- in->inode.version));
+ in->get_version()));
in = diri;
}
} else {
CDentry::linkage_t *dnl = dn->get_projected_linkage();
if (dnl->is_primary()) {
CInode *in = dnl->get_inode();
- if (in->inode.nlink == 0)
+ if (in->get_inode()->nlink == 0)
in->state_set(CInode::STATE_ORPHAN);
maybe_eval_stray(in);
}
void MDCache::encode_replica_inode(CInode *in, mds_rank_t to, bufferlist& bl,
uint64_t features)
{
- ENCODE_START(2, 1, bl);
ceph_assert(in->is_auth());
- encode(in->inode.ino, bl); // bleh, minor assymetry here
+
+ ENCODE_START(2, 1, bl);
+ encode(in->ino(), bl); // bleh, minor assymetry here
encode(in->last, bl);
__u32 nonce = in->add_replica(to);
decode(nonce, p);
in = get_inode(ino, last);
if (!in) {
- in = new CInode(this, false, 1, last);
+ in = new CInode(this, false, 2, last);
in->set_replica_nonce(nonce);
in->_decode_base(p);
in->_decode_locks_state_for_replica(p, true);
// dft lock
if (diri->is_auth()) {
// journal dirfragtree
- auto &pi = diri->project_inode();
- pi.inode.version = diri->pre_dirty();
+ auto pi = diri->project_inode();
+ pi.inode->version = diri->pre_dirty();
journal_dirty_inode(mdr.get(), &le->metablob, diri);
} else {
mds->locker->mark_updated_scatterlock(&diri->dirfragtreelock);
}
if (diri_auth) {
- auto &pi = diri->project_inode();
- pi.inode.version = diri->pre_dirty();
+ auto pi = diri->project_inode();
+ pi.inode->version = diri->pre_dirty();
diri->pop_and_dirty_projected_inode(ls); // hacky
le->metablob.add_primary_dentry(diri->get_projected_parent_dn(), diri, true);
} else {
}
}
- if (!dir_info.same_sums(diri->inode.dirstat) ||
- !nest_info.same_sums(diri->inode.rstat)) {
+ if (!dir_info.same_sums(diri->get_inode()->dirstat) ||
+ !nest_info.same_sums(diri->get_inode()->rstat)) {
dout(10) << __func__ << " failed to fix fragstat/rstat on "
<< *diri << dendl;
}
return;
// project_snaprealm() upgrades snaprealm format
- auto &pi = in->project_inode(false, true);
+ auto pi = in->project_inode(false, true);
mdr->add_projected_inode(in);
- pi.inode.version = in->pre_dirty();
+ pi.inode->version = in->pre_dirty();
mdr->ls = mds->mdlog->get_current_segment();
EUpdate *le = new EUpdate(mds->mdlog, "upgrade_snaprealm");
* away.
*/
void MDCache::maybe_eval_stray(CInode *in, bool delay) {
- if (in->inode.nlink > 0 || in->is_base() || is_readonly() ||
+ if (in->get_inode()->nlink > 0 || in->is_base() || is_readonly() ||
mds->get_state() <= MDSMap::STATE_REJOIN)
return;
void project_rstat_inode_to_frag(CInode *cur, CDir *parent, snapid_t first,
int linkunlink, SnapRealm *prealm);
- void _project_rstat_inode_to_frag(CInode::mempool_inode & inode, snapid_t ofirst, snapid_t last,
+ void _project_rstat_inode_to_frag(const CInode::mempool_inode* inode, snapid_t ofirst, snapid_t last,
CDir *parent, int linkunlink, bool update_inode);
void project_rstat_frag_to_inode(nest_info_t& rstat, nest_info_t& accounted_rstat,
snapid_t ofirst, snapid_t last,
dout(7) << *in << dendl;
ceph_assert(!in->is_replica(mds->get_nodeid()));
- encode(in->inode.ino, enc_state);
+ encode(in->ino(), enc_state);
encode(in->last, enc_state);
in->encode_export(enc_state);
in = cache->get_inode(ino, last);
if (!in) {
- in = new CInode(mds->mdcache, true, 1, last);
+ in = new CInode(mds->mdcache, true, 2, last);
added = true;
}
dout(10) << " had " << *in << dendl;
}
- if (in->inode.is_dirty_rstat())
+ if (in->get_inode()->is_dirty_rstat())
in->mark_dirty_rstat();
// clear if dirtyscattered, since we're going to journal this
void RecoveryQueue::_start(CInode *in)
{
- auto pi = in->get_projected_inode();
+ const auto& pi = in->get_projected_inode();
// blech
if (pi->client_ranges.size() && !pi->get_max_size()) {
auto p = file_recovering.find(in);
if (pi->client_ranges.size() && pi->get_max_size()) {
- dout(10) << "starting " << in->inode.size << " " << pi->client_ranges
+ dout(10) << "starting " << pi->size << " " << pi->client_ranges
<< " " << *in << dendl;
if (p == file_recovering.end()) {
file_recovering.insert(make_pair(in, false));
C_MDC_Recover *fin = new C_MDC_Recover(this, in);
- filer.probe(in->inode.ino, &in->inode.layout, in->last,
+ auto layout = pi->layout;
+ filer.probe(in->ino(), &layout, in->last,
pi->get_max_size(), &fin->size, &fin->mtime, false,
0, fin);
} else {
dout(10) << "already working on " << *in << ", set need_restart flag" << dendl;
}
} else {
- dout(10) << "skipping " << in->inode.size << " " << *in << dendl;
+ dout(10) << "skipping " << pi->size << " " << *in << dendl;
if (p == file_recovering.end()) {
in->state_clear(CInode::STATE_RECOVERING);
mds->locker->eval(in, CEPH_LOCK_IFILE);
{
// Record backtrace fails as remote linkage damage, as
// we may not be able to resolve hard links to this inode
- mdcache->mds->damage_table.notify_remote_damaged(in->inode.ino, path);
+ mdcache->mds->damage_table.notify_remote_damaged(in->ino(), path);
} else if (result.inode.checked && !result.inode.passed &&
!result.inode.repaired) {
// Record damaged inode structures as damaged dentries as
* create a new inode. set c/m/atime. hit dir pop.
*/
CInode* Server::prepare_new_inode(MDRequestRef& mdr, CDir *dir, inodeno_t useino, unsigned mode,
- file_layout_t *layout)
+ const file_layout_t *layout)
{
CInode *in = new CInode(mdcache);
+ auto _inode = in->_get_inode();
// Server::prepare_force_open_sessions() can re-open session in closing
// state. In that corner case, session's prealloc_inos are being freed.
bool allow_prealloc_inos = mdr->session->is_open();
// assign ino
- if (allow_prealloc_inos && (mdr->used_prealloc_ino = in->inode.ino = mdr->session->take_ino(useino))) {
+ if (allow_prealloc_inos && (mdr->used_prealloc_ino = _inode->ino = mdr->session->take_ino(useino))) {
mds->sessionmap.mark_projected(mdr->session);
dout(10) << "prepare_new_inode used_prealloc " << mdr->used_prealloc_ino
<< " (" << mdr->session->info.prealloc_inos
<< dendl;
} else {
mdr->alloc_ino =
- in->inode.ino = mds->inotable->project_alloc_id(useino);
+ _inode->ino = mds->inotable->project_alloc_id(useino);
dout(10) << "prepare_new_inode alloc " << mdr->alloc_ino << dendl;
}
- if (useino && useino != in->inode.ino) {
- dout(0) << "WARNING: client specified " << useino << " and i allocated " << in->inode.ino << dendl;
+ if (useino && useino != _inode->ino) {
+ dout(0) << "WARNING: client specified " << useino << " and i allocated " << _inode->ino << dendl;
mds->clog->error() << mdr->client_request->get_source()
<< " specified ino " << useino
- << " but mds." << mds->get_nodeid() << " allocated " << in->inode.ino;
+ << " but mds." << mds->get_nodeid() << " allocated " << _inode->ino;
//ceph_abort(); // just for now.
}
dout(10) << "prepare_new_inode prealloc " << mdr->prealloc_inos << dendl;
}
- in->inode.version = 1;
- in->inode.xattr_version = 1;
- in->inode.nlink = 1; // FIXME
+ _inode->version = 1;
+ _inode->xattr_version = 1;
+ _inode->nlink = 1; // FIXME
- in->inode.mode = mode;
+ _inode->mode = mode;
// FIPS zeroization audit 20191117: this memset is not security related.
- memset(&in->inode.dir_layout, 0, sizeof(in->inode.dir_layout));
- if (in->inode.is_dir()) {
- in->inode.dir_layout.dl_dir_hash = g_conf()->mds_default_dir_hash;
+ memset(&_inode->dir_layout, 0, sizeof(_inode->dir_layout));
+ if (_inode->is_dir()) {
+ _inode->dir_layout.dl_dir_hash = g_conf()->mds_default_dir_hash;
} else if (layout) {
- in->inode.layout = *layout;
+ _inode->layout = *layout;
} else {
- in->inode.layout = mdcache->default_file_layout;
+ _inode->layout = mdcache->default_file_layout;
}
- in->inode.truncate_size = -1ull; // not truncated, yet!
- in->inode.truncate_seq = 1; /* starting with 1, 0 is kept for no-truncation logic */
+ _inode->truncate_size = -1ull; // not truncated, yet!
+ _inode->truncate_seq = 1; /* starting with 1, 0 is kept for no-truncation logic */
CInode *diri = dir->get_inode();
- dout(10) << oct << " dir mode 0" << diri->inode.mode << " new mode 0" << mode << dec << dendl;
+ dout(10) << oct << " dir mode 0" << diri->get_inode()->mode << " new mode 0" << mode << dec << dendl;
- if (diri->inode.mode & S_ISGID) {
+ if (diri->get_inode()->mode & S_ISGID) {
dout(10) << " dir is sticky" << dendl;
- in->inode.gid = diri->inode.gid;
+ _inode->gid = diri->get_inode()->gid;
if (S_ISDIR(mode)) {
dout(10) << " new dir also sticky" << dendl;
- in->inode.mode |= S_ISGID;
+ _inode->mode |= S_ISGID;
}
} else
- in->inode.gid = mdr->client_request->get_caller_gid();
+ _inode->gid = mdr->client_request->get_caller_gid();
- in->inode.uid = mdr->client_request->get_caller_uid();
+ _inode->uid = mdr->client_request->get_caller_uid();
- in->inode.btime = in->inode.ctime = in->inode.mtime = in->inode.atime =
+ _inode->btime = _inode->ctime = _inode->mtime = _inode->atime =
mdr->get_op_stamp();
- in->inode.change_attr = 0;
+ _inode->change_attr = 0;
const cref_t<MClientRequest> &req = mdr->client_request;
if (req->get_data().length()) {
auto p = req->get_data().cbegin();
// xattrs on new inode?
- CInode::mempool_xattr_map xattrs;
- decode_noshare(xattrs, p);
- for (const auto &p : xattrs) {
- dout(10) << "prepare_new_inode setting xattr " << p.first << dendl;
- auto em = in->xattrs.emplace(std::piecewise_construct, std::forward_as_tuple(p.first), std::forward_as_tuple(p.second));
- if (!em.second)
- em.first->second = p.second;
- }
+ auto _xattrs = CInode::allocate_xattr_map();
+ decode_noshare(*_xattrs, p);
+ dout(10) << "prepare_new_inode setting xattrs " << *_xattrs << dendl;
+ in->reset_xattrs(std::move(_xattrs));
}
if (!mds->mdsmap->get_inline_data_enabled() ||
!mdr->session->get_connection()->has_feature(CEPH_FEATURE_MDS_INLINE_DATA))
- in->inode.inline_data.version = CEPH_INLINE_NONE;
+ _inode->inline_data.version = CEPH_INLINE_NONE;
mdcache->add_inode(in); // add
dout(10) << "prepare_new_inode " << *in << dendl;
return;
}
- if (!cur->inode.is_file()) {
+ if (!cur->is_file()) {
// can only open non-regular inode with mode FILE_MODE_PIN, at least for now.
cmode = CEPH_FILE_MODE_PIN;
// the inode is symlink and client wants to follow it, ignore the O_TRUNC flag.
- if (cur->inode.is_symlink() && !(flags & CEPH_O_NOFOLLOW))
+ if (cur->is_symlink() && !(flags & CEPH_O_NOFOLLOW))
flags &= ~CEPH_O_TRUNC;
}
respond_to_request(mdr, -ENXIO); // FIXME what error do we want?
return;
}*/
- if ((flags & CEPH_O_DIRECTORY) && !cur->inode.is_dir() && !cur->inode.is_symlink()) {
+ if ((flags & CEPH_O_DIRECTORY) && !cur->is_dir() && !cur->is_symlink()) {
dout(7) << "specified O_DIRECTORY on non-directory " << *cur << dendl;
respond_to_request(mdr, -EINVAL);
return;
}
- if ((flags & CEPH_O_TRUNC) && !cur->inode.is_file()) {
+ if ((flags & CEPH_O_TRUNC) && !cur->is_file()) {
dout(7) << "specified O_TRUNC on !(file|symlink) " << *cur << dendl;
// we should return -EISDIR for directory, return -EINVAL for other non-regular
- respond_to_request(mdr, cur->inode.is_dir() ? -EISDIR : -EINVAL);
+ respond_to_request(mdr, cur->is_dir() ? -EISDIR : -EINVAL);
return;
}
- if (cur->inode.inline_data.version != CEPH_INLINE_NONE &&
+ if (cur->get_inode()->inline_data.version != CEPH_INLINE_NONE &&
!mdr->session->get_connection()->has_feature(CEPH_FEATURE_MDS_INLINE_DATA)) {
dout(7) << "old client cannot open inline data file " << *cur << dendl;
respond_to_request(mdr, -EPERM);
return;
// wait for pending truncate?
- const auto pi = cur->get_projected_inode();
+ const auto& pi = cur->get_projected_inode();
if (pi->is_truncating()) {
dout(10) << " waiting for pending truncate from " << pi->truncate_from
<< " to " << pi->truncate_size << " to complete on " << *cur << dendl;
dn->pop_projected_linkage();
// dirty inode, dn, dir
- newi->inode.version--; // a bit hacky, see C_MDS_mknod_finish
- newi->mark_dirty(newi->inode.version+1, mdr->ls);
+ newi->mark_dirty(mdr->ls);
newi->mark_dirty_parent(mdr->ls, true);
mdr->apply();
mds->locker->create_lock_cache(mdr, diri, &mdr->dir_layout);
// create inode.
- CInode *in = prepare_new_inode(mdr, dn->get_dir(), inodeno_t(req->head.ino),
- req->head.args.open.mode | S_IFREG, &layout);
- ceph_assert(in);
+ CInode *newi = prepare_new_inode(mdr, dn->get_dir(), inodeno_t(req->head.ino),
+ req->head.args.open.mode | S_IFREG, &layout);
+ ceph_assert(newi);
// it's a file.
- dn->push_projected_linkage(in);
+ dn->push_projected_linkage(newi);
- in->inode.version = dn->pre_dirty();
+ auto _inode = newi->_get_inode();
+ _inode->version = dn->pre_dirty();
if (layout.pool_id != mdcache->default_file_layout.pool_id)
- in->inode.add_old_pool(mdcache->default_file_layout.pool_id);
- in->inode.update_backtrace();
- in->inode.rstat.rfiles = 1;
+ _inode->add_old_pool(mdcache->default_file_layout.pool_id);
+ _inode->update_backtrace();
+ _inode->rstat.rfiles = 1;
+ _inode->accounted_rstat = _inode->rstat;
SnapRealm *realm = diri->find_snaprealm();
snapid_t follows = mdcache->get_global_snaprealm()->get_newest_seq();
ceph_assert(follows >= realm->get_newest_seq());
ceph_assert(dn->first == follows+1);
- in->first = dn->first;
+ newi->first = dn->first;
// do the open
- Capability *cap = mds->locker->issue_new_caps(in, cmode, mdr, realm);
- in->authlock.set_state(LOCK_EXCL);
- in->xattrlock.set_state(LOCK_EXCL);
+ Capability *cap = mds->locker->issue_new_caps(newi, cmode, mdr, realm);
+ newi->authlock.set_state(LOCK_EXCL);
+ newi->xattrlock.set_state(LOCK_EXCL);
if (cap && (cmode & CEPH_FILE_MODE_WR)) {
- in->inode.client_ranges[client].range.first = 0;
- in->inode.client_ranges[client].range.last = in->inode.layout.stripe_unit;
- in->inode.client_ranges[client].follows = follows;
+ _inode->client_ranges[client].range.first = 0;
+ _inode->client_ranges[client].range.last = _inode->layout.stripe_unit;
+ _inode->client_ranges[client].follows = follows;
cap->mark_clientwriteable();
}
mdlog->start_entry(le);
le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid());
journal_allocated_inos(mdr, &le->metablob);
- mdcache->predirty_journal_parents(mdr, &le->metablob, in, dn->get_dir(), PREDIRTY_PRIMARY|PREDIRTY_DIR, 1);
- le->metablob.add_primary_dentry(dn, in, true, true, true);
+ mdcache->predirty_journal_parents(mdr, &le->metablob, newi, dn->get_dir(), PREDIRTY_PRIMARY|PREDIRTY_DIR, 1);
+ le->metablob.add_primary_dentry(dn, newi, true, true, true);
// make sure this inode gets into the journal
- le->metablob.add_opened_ino(in->ino());
+ le->metablob.add_opened_ino(newi->ino());
- C_MDS_openc_finish *fin = new C_MDS_openc_finish(this, mdr, dn, in);
+ C_MDS_openc_finish *fin = new C_MDS_openc_finish(this, mdr, dn, newi);
if (mdr->session->info.has_feature(CEPHFS_FEATURE_DELEG_INO)) {
openc_response_t ocresp;
dout(10) << "adding created_ino and delegated_inos" << dendl;
- ocresp.created_ino = in->inode.ino;
+ ocresp.created_ino = _inode->ino;
if (delegate_inos_pct && !req->is_queued_for_replay()) {
// Try to delegate some prealloc_inos to the client, if it's down to half the max
} else if (mdr->client_request->get_connection()->has_feature(CEPH_FEATURE_REPLY_CREATE_INODE)) {
dout(10) << "adding ino to reply to indicate inode was created" << dendl;
// add the file created flag onto the reply if create_flags features is supported
- encode(in->inode.ino, mdr->reply_extra_bl);
+ encode(newi->ino(), mdr->reply_extra_bl);
}
- journal_and_reply(mdr, in, dn, le, fin);
+ journal_and_reply(mdr, newi, dn, le, fin);
// We hit_dir (via hit_inode) in our finish callback, but by then we might
// have overshot the split size (multiple opencs in flight), so here is
MDSRank *mds = get_mds();
// notify any clients
- if (truncating_smaller && in->inode.is_truncating()) {
+ if (truncating_smaller && in->get_inode()->is_truncating()) {
mds->locker->issue_truncate(in);
mds->mdcache->truncate_inode(in, mdr->ls);
}
if (!mds->locker->acquire_locks(mdr, lov))
return;
- if ((mask & CEPH_SETATTR_UID) && (cur->inode.uid != req->head.args.setattr.uid))
+ if ((mask & CEPH_SETATTR_UID) && (cur->get_inode()->uid != req->head.args.setattr.uid))
access_mask |= MAY_CHOWN;
- if ((mask & CEPH_SETATTR_GID) && (cur->inode.gid != req->head.args.setattr.gid))
+ if ((mask & CEPH_SETATTR_GID) && (cur->get_inode()->gid != req->head.args.setattr.gid))
access_mask |= MAY_CHGRP;
if (!check_access(mdr, cur, access_mask))
return;
// trunc from bigger -> smaller?
- auto pip = cur->get_projected_inode();
+ const auto& pip = cur->get_projected_inode();
uint64_t old_size = std::max<uint64_t>(pip->size, req->head.args.setattr.old_size);
EUpdate *le = new EUpdate(mdlog, "setattr");
mdlog->start_entry(le);
- auto &pi = cur->project_inode();
+ auto pi = cur->project_inode();
if (mask & CEPH_SETATTR_UID)
- pi.inode.uid = req->head.args.setattr.uid;
+ pi.inode->uid = req->head.args.setattr.uid;
if (mask & CEPH_SETATTR_GID)
- pi.inode.gid = req->head.args.setattr.gid;
+ pi.inode->gid = req->head.args.setattr.gid;
if (mask & CEPH_SETATTR_MODE)
- pi.inode.mode = (pi.inode.mode & ~07777) | (req->head.args.setattr.mode & 07777);
+ pi.inode->mode = (pi.inode->mode & ~07777) | (req->head.args.setattr.mode & 07777);
else if ((mask & (CEPH_SETATTR_UID|CEPH_SETATTR_GID|CEPH_SETATTR_KILL_SGUID)) &&
- S_ISREG(pi.inode.mode) &&
- (pi.inode.mode & (S_IXUSR|S_IXGRP|S_IXOTH))) {
- pi.inode.mode &= ~(S_ISUID|S_ISGID);
+ S_ISREG(pi.inode->mode) &&
+ (pi.inode->mode & (S_IXUSR|S_IXGRP|S_IXOTH))) {
+ pi.inode->mode &= ~(S_ISUID|S_ISGID);
}
if (mask & CEPH_SETATTR_MTIME)
- pi.inode.mtime = req->head.args.setattr.mtime;
+ pi.inode->mtime = req->head.args.setattr.mtime;
if (mask & CEPH_SETATTR_ATIME)
- pi.inode.atime = req->head.args.setattr.atime;
+ pi.inode->atime = req->head.args.setattr.atime;
if (mask & CEPH_SETATTR_BTIME)
- pi.inode.btime = req->head.args.setattr.btime;
+ pi.inode->btime = req->head.args.setattr.btime;
if (mask & (CEPH_SETATTR_ATIME | CEPH_SETATTR_MTIME | CEPH_SETATTR_BTIME))
- pi.inode.time_warp_seq++; // maybe not a timewarp, but still a serialization point.
+ pi.inode->time_warp_seq++; // maybe not a timewarp, but still a serialization point.
if (mask & CEPH_SETATTR_SIZE) {
if (truncating_smaller) {
- pi.inode.truncate(old_size, req->head.args.setattr.size);
+ pi.inode->truncate(old_size, req->head.args.setattr.size);
le->metablob.add_truncate_start(cur->ino());
} else {
- pi.inode.size = req->head.args.setattr.size;
- pi.inode.rstat.rbytes = pi.inode.size;
+ pi.inode->size = req->head.args.setattr.size;
+ pi.inode->rstat.rbytes = pi.inode->size;
}
- pi.inode.mtime = mdr->get_op_stamp();
+ pi.inode->mtime = mdr->get_op_stamp();
// adjust client's max_size?
CInode::mempool_inode::client_range_map new_ranges;
bool max_increased = false;
- mds->locker->calc_new_client_ranges(cur, pi.inode.size, true, &new_ranges, &max_increased);
- if (pi.inode.client_ranges != new_ranges) {
- dout(10) << " client_ranges " << pi.inode.client_ranges << " -> " << new_ranges << dendl;
- pi.inode.client_ranges = new_ranges;
+ mds->locker->calc_new_client_ranges(cur, pi.inode->size, true, &new_ranges, &max_increased);
+ if (pi.inode->client_ranges != new_ranges) {
+ dout(10) << " client_ranges " << pi.inode->client_ranges << " -> " << new_ranges << dendl;
+ pi.inode->client_ranges = new_ranges;
changed_ranges = true;
}
}
- pi.inode.version = cur->pre_dirty();
- pi.inode.ctime = mdr->get_op_stamp();
- if (mdr->get_op_stamp() > pi.inode.rstat.rctime)
- pi.inode.rstat.rctime = mdr->get_op_stamp();
- pi.inode.change_attr++;
+ pi.inode->version = cur->pre_dirty();
+ pi.inode->ctime = mdr->get_op_stamp();
+ if (mdr->get_op_stamp() > pi.inode->rstat.rctime)
+ pi.inode->rstat.rctime = mdr->get_op_stamp();
+ pi.inode->change_attr++;
// log + wait
le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid());
mdlog->start_entry(le);
// prepare
- auto &pi = in->project_inode();
- pi.inode.version = in->pre_dirty();
- pi.inode.mtime = pi.inode.ctime = mdr->get_op_stamp();
- if (mdr->get_op_stamp() > pi.inode.rstat.rctime)
- pi.inode.rstat.rctime = mdr->get_op_stamp();
- pi.inode.change_attr++;
-
- uint64_t old_size = std::max<uint64_t>(pi.inode.size, mdr->client_request->head.args.open.old_size);
+ auto pi = in->project_inode();
+ pi.inode->version = in->pre_dirty();
+ pi.inode->mtime = pi.inode->ctime = mdr->get_op_stamp();
+ if (mdr->get_op_stamp() > pi.inode->rstat.rctime)
+ pi.inode->rstat.rctime = mdr->get_op_stamp();
+ pi.inode->change_attr++;
+
+ uint64_t old_size = std::max<uint64_t>(pi.inode->size, mdr->client_request->head.args.open.old_size);
if (old_size > 0) {
- pi.inode.truncate(old_size, 0);
+ pi.inode->truncate(old_size, 0);
le->metablob.add_truncate_start(in->ino());
}
bool changed_ranges = false;
if (cap && (cmode & CEPH_FILE_MODE_WR)) {
- pi.inode.client_ranges[client].range.first = 0;
- pi.inode.client_ranges[client].range.last = pi.inode.get_layout_size_increment();
- pi.inode.client_ranges[client].follows = realm->get_newest_seq();
+ pi.inode->client_ranges[client].range.first = 0;
+ pi.inode->client_ranges[client].range.last = pi.inode->get_layout_size_increment();
+ pi.inode->client_ranges[client].follows = realm->get_newest_seq();
changed_ranges = true;
cap->mark_clientwriteable();
}
return;
// project update
- auto &pi = cur->project_inode();
- pi.inode.layout = layout;
+ auto pi = cur->project_inode();
+ pi.inode->layout = layout;
// add the old pool to the inode
- pi.inode.add_old_pool(old_layout.pool_id);
- pi.inode.version = cur->pre_dirty();
- pi.inode.ctime = mdr->get_op_stamp();
- if (mdr->get_op_stamp() > pi.inode.rstat.rctime)
- pi.inode.rstat.rctime = mdr->get_op_stamp();
- pi.inode.change_attr++;
+ pi.inode->add_old_pool(old_layout.pool_id);
+ pi.inode->version = cur->pre_dirty();
+ pi.inode->ctime = mdr->get_op_stamp();
+ if (mdr->get_op_stamp() > pi.inode->rstat.rctime)
+ pi.inode->rstat.rctime = mdr->get_op_stamp();
+ pi.inode->change_attr++;
// log + wait
mdr->ls = mdlog->get_current_segment();
return;
// validate layout
- const auto old_pi = cur->get_projected_inode();
+ const auto& old_pi = cur->get_projected_inode();
file_layout_t layout;
if (old_pi->has_layout())
layout = old_pi->layout;
if (!check_access(mdr, cur, access))
return;
- auto &pi = cur->project_inode();
- pi.inode.layout = layout;
- pi.inode.version = cur->pre_dirty();
+ auto pi = cur->project_inode();
+ pi.inode->layout = layout;
+ pi.inode->version = cur->pre_dirty();
// log + wait
mdr->ls = mdlog->get_current_segment();
if (check_layout_vxattr(mdr, rest, value, &layout) < 0)
return;
- auto &pi = cur->project_inode();
- pi.inode.layout = layout;
+ auto pi = cur->project_inode();
+ pi.inode->layout = layout;
mdr->no_early_reply = true;
- pip = &pi.inode;
+ pip = pi.inode.get();
} else if (name.compare(0, 16, "ceph.file.layout") == 0) {
if (!cur->is_file()) {
respond_to_request(mdr, -EINVAL);
if (!mds->locker->acquire_locks(mdr, lov))
return;
- auto &pi = cur->project_inode();
- int64_t old_pool = pi.inode.layout.pool_id;
- pi.inode.add_old_pool(old_pool);
- pi.inode.layout = layout;
- pip = &pi.inode;
+ auto pi = cur->project_inode();
+ int64_t old_pool = pi.inode->layout.pool_id;
+ pi.inode->add_old_pool(old_pool);
+ pi.inode->layout = layout;
+ pip = pi.inode.get();
} else if (name.compare(0, 10, "ceph.quota") == 0) {
if (!cur->is_dir() || cur->is_root()) {
respond_to_request(mdr, -EINVAL);
if (!xlock_policylock(mdr, cur, false, new_realm))
return;
- auto &pi = cur->project_inode(false, new_realm);
- pi.inode.quota = quota;
+ auto pi = cur->project_inode(false, new_realm);
+ pi.inode->quota = quota;
if (new_realm) {
SnapRealm *realm = cur->find_snaprealm();
newsnap.seq = seq;
}
mdr->no_early_reply = true;
- pip = &pi.inode;
+ pip = pi.inode.get();
client_t exclude_ct = mdr->get_client();
mdcache->broadcast_quota_to_client(cur, exclude_ct, true);
if (!xlock_policylock(mdr, cur))
return;
- auto &pi = cur->project_inode();
+ auto pi = cur->project_inode();
cur->set_export_pin(rank);
- pip = &pi.inode;
+ pip = pi.inode.get();
} else if (name == "ceph.dir.pin.random"sv) {
if (!cur->is_dir() || cur->is_root()) {
respond_to_request(mdr, -EINVAL);
if (!xlock_policylock(mdr, cur))
return;
- auto &pi = cur->project_inode();
+ auto pi = cur->project_inode();
cur->setxattr_ephemeral_rand(val);
- pip = &pi.inode;
+ pip = pi.inode.get();
} else if (name == "ceph.dir.pin.distributed"sv) {
if (!cur->is_dir() || cur->is_root()) {
respond_to_request(mdr, -EINVAL);
if (!xlock_policylock(mdr, cur))
return;
- auto &pi = cur->project_inode();
+ auto pi = cur->project_inode();
cur->setxattr_ephemeral_dist(val);
- pip = &pi.inode;
+ pip = pi.inode.get();
} else {
dout(10) << " unknown vxattr " << name << dendl;
respond_to_request(mdr, -EINVAL);
if (!mds->locker->acquire_locks(mdr, lov))
return;
- auto &pi = cur->project_inode();
- pi.inode.clear_layout();
- pi.inode.version = cur->pre_dirty();
+ auto pi = cur->project_inode();
+ pi.inode->clear_layout();
+ pi.inode->version = cur->pre_dirty();
// log + wait
mdr->ls = mdlog->get_current_segment();
if (!check_access(mdr, cur, MAY_WRITE))
return;
- auto pxattrs = cur->get_projected_xattrs();
size_t len = req->get_data().length();
size_t inc = len + name.length();
- // check xattrs kv pairs size
- size_t cur_xattrs_size = 0;
- for (const auto& p : *pxattrs) {
- if ((flags & CEPH_XATTR_REPLACE) && (name.compare(p.first) == 0)) {
- continue;
+ const auto& pxattrs = cur->get_projected_xattrs();
+ if (pxattrs) {
+ // check xattrs kv pairs size
+ size_t cur_xattrs_size = 0;
+ for (const auto& p : *pxattrs) {
+ if ((flags & CEPH_XATTR_REPLACE) && name.compare(p.first) == 0) {
+ continue;
+ }
+ cur_xattrs_size += p.first.length() + p.second.length();
}
- cur_xattrs_size += p.first.length() + p.second.length();
- }
- if (((cur_xattrs_size + inc) > g_conf()->mds_max_xattr_pairs_size)) {
- dout(10) << "xattr kv pairs size too big. cur_xattrs_size "
- << cur_xattrs_size << ", inc " << inc << dendl;
- respond_to_request(mdr, -ENOSPC);
- return;
- }
+ if (((cur_xattrs_size + inc) > g_conf()->mds_max_xattr_pairs_size)) {
+ dout(10) << "xattr kv pairs size too big. cur_xattrs_size "
+ << cur_xattrs_size << ", inc " << inc << dendl;
+ respond_to_request(mdr, -ENOSPC);
+ return;
+ }
- if ((flags & CEPH_XATTR_CREATE) && pxattrs->count(mempool::mds_co::string(name))) {
- dout(10) << "setxattr '" << name << "' XATTR_CREATE and EEXIST on " << *cur << dendl;
- respond_to_request(mdr, -EEXIST);
- return;
+ if ((flags & CEPH_XATTR_CREATE) && pxattrs->count(mempool::mds_co::string(name))) {
+ dout(10) << "setxattr '" << name << "' XATTR_CREATE and EEXIST on " << *cur << dendl;
+ respond_to_request(mdr, -EEXIST);
+ return;
+ }
}
- if ((flags & CEPH_XATTR_REPLACE) && !pxattrs->count(mempool::mds_co::string(name))) {
+
+ if ((flags & CEPH_XATTR_REPLACE) &&
+ !(pxattrs && pxattrs->count(mempool::mds_co::string(name)))) {
dout(10) << "setxattr '" << name << "' XATTR_REPLACE and ENODATA on " << *cur << dendl;
respond_to_request(mdr, -ENODATA);
return;
dout(10) << "setxattr '" << name << "' len " << len << " on " << *cur << dendl;
// project update
- auto &pi = cur->project_inode(true);
- pi.inode.version = cur->pre_dirty();
- pi.inode.ctime = mdr->get_op_stamp();
- if (mdr->get_op_stamp() > pi.inode.rstat.rctime)
- pi.inode.rstat.rctime = mdr->get_op_stamp();
- pi.inode.change_attr++;
- pi.inode.xattr_version++;
- auto &px = *pi.xattrs;
+ auto pi = cur->project_inode(true);
+ pi.inode->version = cur->pre_dirty();
+ pi.inode->ctime = mdr->get_op_stamp();
+ if (mdr->get_op_stamp() > pi.inode->rstat.rctime)
+ pi.inode->rstat.rctime = mdr->get_op_stamp();
+ pi.inode->change_attr++;
+ pi.inode->xattr_version++;
if ((flags & CEPH_XATTR_REMOVE)) {
- px.erase(mempool::mds_co::string(name));
+ pi.xattrs->erase(mempool::mds_co::string(name));
} else {
bufferptr b = buffer::create(len);
if (len)
req->get_data().begin().copy(len, b.c_str());
- auto em = px.emplace(std::piecewise_construct, std::forward_as_tuple(mempool::mds_co::string(name)), std::forward_as_tuple(b));
+ auto em = pi.xattrs->emplace(std::piecewise_construct, std::forward_as_tuple(mempool::mds_co::string(name)), std::forward_as_tuple(b));
if (!em.second)
em.first->second = b;
}
if (!mds->locker->acquire_locks(mdr, lov))
return;
- auto pxattrs = cur->get_projected_xattrs();
- if (pxattrs->count(mempool::mds_co::string(name)) == 0) {
+ const auto& pxattrs = cur->get_projected_xattrs();
+ if (pxattrs && pxattrs->count(mempool::mds_co::string(name)) == 0) {
dout(10) << "removexattr '" << name << "' and ENODATA on " << *cur << dendl;
respond_to_request(mdr, -ENODATA);
return;
dout(10) << "removexattr '" << name << "' on " << *cur << dendl;
// project update
- auto &pi = cur->project_inode(true);
+ auto pi = cur->project_inode(true);
auto &px = *pi.xattrs;
- pi.inode.version = cur->pre_dirty();
- pi.inode.ctime = mdr->get_op_stamp();
- if (mdr->get_op_stamp() > pi.inode.rstat.rctime)
- pi.inode.rstat.rctime = mdr->get_op_stamp();
- pi.inode.change_attr++;
- pi.inode.xattr_version++;
+ pi.inode->version = cur->pre_dirty();
+ pi.inode->ctime = mdr->get_op_stamp();
+ if (mdr->get_op_stamp() > pi.inode->rstat.rctime)
+ pi.inode->rstat.rctime = mdr->get_op_stamp();
+ pi.inode->change_attr++;
+ pi.inode->xattr_version++;
px.erase(mempool::mds_co::string(name));
// log + wait
// be a bit hacky with the inode version, here.. we decrement it
// just to keep mark_dirty() happen. (we didn't bother projecting
// a new version of hte inode since it's just been created)
- newi->inode.version--;
- newi->mark_dirty(newi->inode.version + 1, mdr->ls);
+ newi->mark_dirty(mdr->ls);
newi->mark_dirty_parent(mdr->ls, true);
// mkdir?
- if (newi->inode.is_dir()) {
+ if (newi->is_dir()) {
CDir *dir = newi->get_dirfrag(frag_t());
ceph_assert(dir);
dir->fnode.version--;
MDRequestRef null_ref;
get_mds()->mdcache->send_dentry_link(dn, null_ref);
- if (newi->inode.is_file()) {
+ if (newi->is_file()) {
get_mds()->locker->share_inode_max_size(newi);
- } else if (newi->inode.is_dir()) {
+ } else if (newi->is_dir()) {
// We do this now so that the linkages on the new directory are stable.
newi->maybe_ephemeral_dist();
newi->maybe_ephemeral_rand(true);
dn->push_projected_linkage(newi);
- newi->inode.rdev = req->head.args.mknod.rdev;
- newi->inode.version = dn->pre_dirty();
- newi->inode.rstat.rfiles = 1;
+ auto _inode = newi->_get_inode();
+ _inode->version = dn->pre_dirty();
+ _inode->rdev = req->head.args.mknod.rdev;
+ _inode->rstat.rfiles = 1;
+ _inode->accounted_rstat = _inode->rstat;
if (layout.pool_id != mdcache->default_file_layout.pool_id)
- newi->inode.add_old_pool(mdcache->default_file_layout.pool_id);
- newi->inode.update_backtrace();
+ _inode->add_old_pool(mdcache->default_file_layout.pool_id);
+ _inode->update_backtrace();
snapid_t follows = mdcache->get_global_snaprealm()->get_newest_seq();
SnapRealm *realm = dn->get_dir()->inode->find_snaprealm();
// if the client created a _regular_ file via MKNOD, it's highly likely they'll
// want to write to it (e.g., if they are reexporting NFS)
- if (S_ISREG(newi->inode.mode)) {
+ if (S_ISREG(_inode->mode)) {
// issue a cap on the file
int cmode = CEPH_FILE_MODE_RDWR;
Capability *cap = mds->locker->issue_new_caps(newi, cmode, mdr, realm);
newi->xattrlock.set_state(LOCK_EXCL);
dout(15) << " setting a client_range too, since this is a regular file" << dendl;
- newi->inode.client_ranges[client].range.first = 0;
- newi->inode.client_ranges[client].range.last = newi->inode.layout.stripe_unit;
- newi->inode.client_ranges[client].follows = follows;
+ _inode->client_ranges[client].range.first = 0;
+ _inode->client_ranges[client].range.last = _inode->layout.stripe_unit;
+ _inode->client_ranges[client].follows = follows;
cap->mark_clientwriteable();
}
}
ceph_assert(dn->first == follows + 1);
newi->first = dn->first;
- dout(10) << "mknod mode " << newi->inode.mode << " rdev " << newi->inode.rdev << dendl;
+ dout(10) << "mknod mode " << _inode->mode << " rdev " << _inode->rdev << dendl;
// prepare finisher
mdr->ls = mdlog->get_current_segment();
// it's a directory.
dn->push_projected_linkage(newi);
- newi->inode.version = dn->pre_dirty();
- newi->inode.rstat.rsubdirs = 1;
- newi->inode.update_backtrace();
+ auto _inode = newi->_get_inode();
+ _inode->version = dn->pre_dirty();
+ _inode->rstat.rsubdirs = 1;
+ _inode->accounted_rstat = _inode->rstat;
+ _inode->update_backtrace();
snapid_t follows = mdcache->get_global_snaprealm()->get_newest_seq();
SnapRealm *realm = dn->get_dir()->inode->find_snaprealm();
dn->push_projected_linkage(newi);
newi->symlink = req->get_path2();
- newi->inode.size = newi->symlink.length();
- newi->inode.rstat.rbytes = newi->inode.size;
- newi->inode.rstat.rfiles = 1;
- newi->inode.version = dn->pre_dirty();
- newi->inode.update_backtrace();
+ auto _inode = newi->_get_inode();
+ _inode->version = dn->pre_dirty();
+ _inode->size = newi->symlink.length();
+ _inode->rstat.rbytes = _inode->size;
+ _inode->rstat.rfiles = 1;
+ _inode->accounted_rstat = _inode->rstat;
+ _inode->update_backtrace();
newi->first = dn->first;
version_t tipv = targeti->pre_dirty();
// project inode update
- auto &pi = targeti->project_inode();
- pi.inode.nlink++;
- pi.inode.ctime = mdr->get_op_stamp();
- if (mdr->get_op_stamp() > pi.inode.rstat.rctime)
- pi.inode.rstat.rctime = mdr->get_op_stamp();
- pi.inode.change_attr++;
- pi.inode.version = tipv;
+ auto pi = targeti->project_inode();
+ pi.inode->nlink++;
+ pi.inode->ctime = mdr->get_op_stamp();
+ if (mdr->get_op_stamp() > pi.inode->rstat.rctime)
+ pi.inode->rstat.rctime = mdr->get_op_stamp();
+ pi.inode->change_attr++;
+ pi.inode->version = tipv;
bool adjust_realm = false;
if (!targeti->is_projected_snaprealm_global()) {
EPeerUpdate::OP_PREPARE, EPeerUpdate::LINK);
mdlog->start_entry(le);
- auto &pi = dnl->get_inode()->project_inode();
+ auto pi = dnl->get_inode()->project_inode();
// update journaled target inode
bool inc;
bool realm_projected = false;
if (mdr->peer_request->get_op() == MMDSPeerRequest::OP_LINKPREP) {
inc = true;
- pi.inode.nlink++;
+ pi.inode->nlink++;
if (!targeti->is_projected_snaprealm_global()) {
sr_t *newsnap = targeti->project_snaprealm();
targeti->mark_snaprealm_global(newsnap);
}
} else {
inc = false;
- pi.inode.nlink--;
+ pi.inode->nlink--;
if (targeti->is_projected_snaprealm_global()) {
ceph_assert(mdr->peer_request->desti_snapbl.length());
auto p = mdr->peer_request->desti_snapbl.cbegin();
sr_t *newsnap = targeti->project_snaprealm();
decode(*newsnap, p);
- if (pi.inode.nlink == 0)
+ if (pi.inode->nlink == 0)
ceph_assert(!newsnap->is_parent_global());
realm_projected = true;
link_rollback rollback;
rollback.reqid = mdr->reqid;
rollback.ino = targeti->ino();
- rollback.old_ctime = targeti->inode.ctime; // we hold versionlock xlock; no concorrent projections
+ rollback.old_ctime = targeti->get_inode()->ctime; // we hold versionlock xlock; no concorrent projections
const fnode_t *pf = targeti->get_parent_dn()->get_dir()->get_projected_fnode();
rollback.old_dir_mtime = pf->fragstat.mtime;
rollback.old_dir_rctime = pf->rstat.rctime;
encode(rollback, le->rollback);
mdr->more()->rollback_bl = le->rollback;
- pi.inode.ctime = mdr->get_op_stamp();
- pi.inode.version = targeti->pre_dirty();
+ pi.inode->ctime = mdr->get_op_stamp();
+ pi.inode->version = targeti->pre_dirty();
- dout(10) << " projected inode " << pi.inode.ino << " v " << pi.inode.version << dendl;
+ dout(10) << " projected inode " << pi.inode->ino << " v " << pi.inode->version << dendl;
// commit case
mdcache->predirty_journal_parents(mdr, &le->commit, dnl->get_inode(), 0, PREDIRTY_SHALLOW|PREDIRTY_PRIMARY);
dout(10) << " target is " << *in << dendl;
ceph_assert(!in->is_projected()); // live peer request hold versionlock xlock.
- auto &pi = in->project_inode();
- pi.inode.version = in->pre_dirty();
+ auto pi = in->project_inode();
+ pi.inode->version = in->pre_dirty();
mut->add_projected_inode(in);
// parent dir rctime
fnode_t *pf = parent->project_fnode();
mut->add_projected_fnode(parent);
pf->version = parent->pre_dirty();
- if (pf->fragstat.mtime == pi.inode.ctime) {
+ if (pf->fragstat.mtime == pi.inode->ctime) {
pf->fragstat.mtime = rollback.old_dir_mtime;
- if (pf->rstat.rctime == pi.inode.ctime)
+ if (pf->rstat.rctime == pi.inode->ctime)
pf->rstat.rctime = rollback.old_dir_rctime;
mut->add_updated_lock(&parent->get_inode()->filelock);
mut->add_updated_lock(&parent->get_inode()->nestlock);
}
// inode
- pi.inode.ctime = rollback.old_ctime;
+ pi.inode->ctime = rollback.old_ctime;
if (rollback.was_inc)
- pi.inode.nlink--;
+ pi.inode->nlink--;
else
- pi.inode.nlink++;
+ pi.inode->nlink++;
map<client_t,ref_t<MClientSnap>> splits;
if (rollback.snapbl.length() && in->snaprealm) {
// the unlinked dentry
dn->pre_dirty();
- auto &pi = in->project_inode();
+ auto pi = in->project_inode();
{
std::string t;
dn->make_path_string(t, true);
- pi.inode.stray_prior_path = std::move(t);
- }
- pi.inode.version = in->pre_dirty();
- pi.inode.ctime = mdr->get_op_stamp();
- if (mdr->get_op_stamp() > pi.inode.rstat.rctime)
- pi.inode.rstat.rctime = mdr->get_op_stamp();
- pi.inode.change_attr++;
- pi.inode.nlink--;
- if (pi.inode.nlink == 0)
+ pi.inode->stray_prior_path = std::move(t);
+ }
+ pi.inode->version = in->pre_dirty();
+ pi.inode->ctime = mdr->get_op_stamp();
+ if (mdr->get_op_stamp() > pi.inode->rstat.rctime)
+ pi.inode->rstat.rctime = mdr->get_op_stamp();
+ pi.inode->change_attr++;
+ pi.inode->nlink--;
+ if (pi.inode->nlink == 0)
in->state_set(CInode::STATE_ORPHAN);
if (mdr->more()->desti_srnode) {
mdcache->predirty_journal_parents(mdr, &le->metablob, in, dn->get_dir(), PREDIRTY_PRIMARY|PREDIRTY_DIR, -1);
mdcache->predirty_journal_parents(mdr, &le->metablob, in, straydn->get_dir(), PREDIRTY_PRIMARY|PREDIRTY_DIR, 1);
- pi.inode.update_backtrace();
+ pi.inode->update_backtrace();
le->metablob.add_primary_dentry(straydn, in, true, true);
} else {
mdr->add_projected_inode(in);
ceph_assert(straydn); // moving to straydn.
// link--, and move.
if (destdn->is_auth()) {
- auto &pi= oldin->project_inode(); //project_snaprealm
- pi.inode.version = straydn->pre_dirty(pi.inode.version);
- pi.inode.update_backtrace();
- tpi = &pi.inode;
+ auto pi= oldin->project_inode(); //project_snaprealm
+ pi.inode->version = straydn->pre_dirty(pi.inode->version);
+ pi.inode->update_backtrace();
+ tpi = pi.inode.get();
}
straydn->push_projected_linkage(oldin);
} else if (destdnl->is_remote()) {
// nlink-- targeti
if (oldin->is_auth()) {
- auto &pi = oldin->project_inode();
- pi.inode.version = oldin->pre_dirty();
- tpi = &pi.inode;
+ auto pi = oldin->project_inode();
+ pi.inode->version = oldin->pre_dirty();
+ tpi = pi.inode.get();
}
}
}
destdn->push_projected_linkage(srcdnl->get_remote_ino(), srcdnl->get_remote_d_type());
// srci
if (srci->is_auth()) {
- auto &pi = srci->project_inode();
- pi.inode.version = srci->pre_dirty();
- spi = &pi.inode;
+ auto pi = srci->project_inode();
+ pi.inode->version = srci->pre_dirty();
+ spi = pi.inode.get();
}
} else {
dout(10) << " will merge remote onto primary link" << dendl;
if (destdn->is_auth()) {
- auto &pi = oldin->project_inode();
- pi.inode.version = mdr->more()->pvmap[destdn] = destdn->pre_dirty(oldin->inode.version);
- spi = &pi.inode;
+ auto pi = oldin->project_inode();
+ pi.inode->version = mdr->more()->pvmap[destdn] = destdn->pre_dirty(oldin->get_version());
+ spi = pi.inode.get();
}
}
} else { // primary
dout(10) << " noting renamed dir open frags " << metablob->renamed_dir_frags << dendl;
}
}
- auto &pi = srci->project_inode(); // project snaprealm if srcdnl->is_primary
+ auto pi = srci->project_inode(); // project snaprealm if srcdnl->is_primary
// & srcdnl->snaprealm
- pi.inode.version = mdr->more()->pvmap[destdn] = destdn->pre_dirty(oldpv);
- pi.inode.update_backtrace();
- spi = &pi.inode;
+ pi.inode->version = mdr->more()->pvmap[destdn] = destdn->pre_dirty(oldpv);
+ pi.inode->update_backtrace();
+ spi = pi.inode.get();
}
destdn->push_projected_linkage(srci);
}
encode(exported_client_map, reply->inode_export, mds->mdsmap->get_up_features());
encode(exported_client_metadata_map, reply->inode_export);
reply->inode_export.claim_append(inodebl);
- reply->inode_export_v = srcdnl->get_inode()->inode.version;
+ reply->inode_export_v = srcdnl->get_inode()->get_version();
}
// remove mdr auth pin
if (in) {
bool projected;
if (in->get_projected_parent_dn()->authority().first == whoami) {
- auto &pi = in->project_inode();
- pip = &pi.inode;
+ auto pi = in->project_inode();
+ pip = pi.inode.get();
mut->add_projected_inode(in);
pip->version = in->pre_dirty();
projected = true;
} else {
- pip = in->get_projected_inode();
+ // FIXME: pip = in->get_projected_inode();
projected = false;
}
if (pip->ctime == rollback.ctime)
bool projected;
CInode::mempool_inode *ti = nullptr;
if (target->get_projected_parent_dn()->authority().first == whoami) {
- auto &pi = target->project_inode();
- ti = &pi.inode;
+ auto pi = target->project_inode();
+ ti = pi.inode.get();
mut->add_projected_inode(target);
ti->version = target->pre_dirty();
projected = true;
} else {
- ti = target->get_projected_inode();
+ //FIXME: ti = target->get_projected_inode();
projected = false;
}
if (ti->ctime == rollback.ctime)
info.name = snapname;
info.stamp = mdr->get_op_stamp();
- auto &pi = diri->project_inode(false, true);
- pi.inode.ctime = info.stamp;
- if (info.stamp > pi.inode.rstat.rctime)
- pi.inode.rstat.rctime = info.stamp;
- pi.inode.rstat.rsnaps++;
- pi.inode.version = diri->pre_dirty();
+ auto pi = diri->project_inode(false, true);
+ pi.inode->ctime = info.stamp;
+ if (info.stamp > pi.inode->rstat.rctime)
+ pi.inode->rstat.rctime = info.stamp;
+ pi.inode->rstat.rsnaps++;
+ pi.inode->version = diri->pre_dirty();
// project the snaprealm
auto &newsnap = *pi.snapnode;
ceph_assert(mds->snapclient->get_cached_version() >= stid);
// journal
- auto &pi = diri->project_inode(false, true);
- pi.inode.version = diri->pre_dirty();
- pi.inode.ctime = mdr->get_op_stamp();
- if (mdr->get_op_stamp() > pi.inode.rstat.rctime)
- pi.inode.rstat.rctime = mdr->get_op_stamp();
- pi.inode.rstat.rsnaps--;
+ auto pi = diri->project_inode(false, true);
+ pi.inode->version = diri->pre_dirty();
+ pi.inode->ctime = mdr->get_op_stamp();
+ if (mdr->get_op_stamp() > pi.inode->rstat.rctime)
+ pi.inode->rstat.rctime = mdr->get_op_stamp();
+ pi.inode->rstat.rsnaps--;
mdr->ls = mdlog->get_current_segment();
EUpdate *le = new EUpdate(mdlog, "rmsnap");
ceph_assert(mds->snapclient->get_cached_version() >= stid);
// journal
- auto &pi = diri->project_inode(false, true);
- pi.inode.ctime = mdr->get_op_stamp();
- if (mdr->get_op_stamp() > pi.inode.rstat.rctime)
- pi.inode.rstat.rctime = mdr->get_op_stamp();
- pi.inode.version = diri->pre_dirty();
+ auto pi = diri->project_inode(false, true);
+ pi.inode->ctime = mdr->get_op_stamp();
+ if (mdr->get_op_stamp() > pi.inode->rstat.rctime)
+ pi.inode->rstat.rctime = mdr->get_op_stamp();
+ pi.inode->version = diri->pre_dirty();
// project the snaprealm
auto &newsnap = *pi.snapnode;
bool _check_access(Session *session, CInode *in, unsigned mask, int caller_uid, int caller_gid, int setattr_uid, int setattr_gid);
CDentry *prepare_stray_dentry(MDRequestRef& mdr, CInode *in);
CInode* prepare_new_inode(MDRequestRef& mdr, CDir *dir, inodeno_t useino, unsigned mode,
- file_layout_t *layout=NULL);
+ const file_layout_t *layout=nullptr);
void journal_allocated_inos(MDRequestRef& mdr, EMetaBlob *blob);
void apply_allocated_inos(MDRequestRef& mdr, Session *session);
if (path.length())
path = path.substr(1); // drop leading /
- if (in->inode.is_dir() &&
- in->inode.has_layout() &&
- in->inode.layout.pool_ns.length() &&
+ const auto& inode = in->get_inode();
+ if (in->is_dir() &&
+ inode->has_layout() &&
+ inode->layout.pool_ns.length() &&
!connection->has_feature(CEPH_FEATURE_FS_FILE_LAYOUT_V2)) {
dout(10) << __func__ << " client doesn't support FS_FILE_LAYOUT_V2" << dendl;
return -EIO;
}
- if (!auth_caps.is_capable(path, in->inode.uid, in->inode.gid, in->inode.mode,
+ if (!auth_caps.is_capable(path, inode->uid, inode->gid, inode->mode,
caller_uid, caller_gid, caller_gid_list, mask,
new_uid, new_gid,
info.inst.addr)) {
SnapContext nullsnapc;
PurgeItem item;
- item.ino = in->inode.ino;
+ item.ino = in->ino();
item.stamp = ceph_clock_now();
if (in->is_dir()) {
item.action = PurgeItem::PURGE_DIR;
ceph_assert(in->last == CEPH_NOSNAP);
}
+ const auto& pi = in->get_projected_inode();
+
uint64_t to = 0;
if (in->is_file()) {
- to = in->inode.get_max_size();
- to = std::max(in->inode.size, to);
+ to = std::max(pi->size, pi->get_max_size());
// when truncating a file, the filer does not delete stripe objects that are
// truncated to zero. so we need to purge stripe objects up to the max size
// the file has ever been.
- to = std::max(in->inode.max_size_ever, to);
+ to = std::max(pi->max_size_ever, to);
}
- auto pi = in->get_projected_inode();
-
item.size = to;
item.layout = pi->layout;
item.old_pools.clear();
EUpdate *le = new EUpdate(mds->mdlog, "purge_stray truncate");
mds->mdlog->start_entry(le);
- auto &pi = in->project_inode();
- pi.inode.size = 0;
- pi.inode.max_size_ever = 0;
- pi.inode.client_ranges.clear();
- pi.inode.truncate_size = 0;
- pi.inode.truncate_from = 0;
- pi.inode.version = in->pre_dirty();
+ auto pi = in->project_inode();
+ pi.inode->size = 0;
+ pi.inode->max_size_ever = 0;
+ pi.inode->client_ranges.clear();
+ pi.inode->truncate_size = 0;
+ pi.inode->truncate_from = 0;
+ pi.inode->version = in->pre_dirty();
le->metablob.add_dir_context(dn->dir);
le->metablob.add_primary_dentry(dn, in, true);
pf->fragstat.nsubdirs--;
else
pf->fragstat.nfiles--;
- pf->rstat.sub(in->inode.accounted_rstat);
+ pf->rstat.sub(in->get_inode()->accounted_rstat);
le->metablob.add_dir_context(dn->dir);
EMetaBlob::dirlump& dl = le->metablob.add_dir(dn->dir, true);
}
// purge?
- if (in->inode.nlink == 0) {
+ if (in->get_inode()->nlink == 0) {
// past snaprealm parents imply snapped dentry remote links.
// only important for directories. normal file data snaps are handled
// by the object store.
}
// don't purge multiversion inode with snap data
if (in->snaprealm && in->snaprealm->has_past_parents() &&
- !in->old_inodes.empty()) {
+ in->is_any_old_inodes()) {
// A file with snapshots: we will truncate the HEAD revision
// but leave the metadata intact.
ceph_assert(!in->is_dir());
CDentry::linkage_t *stray_dnl = stray_dn->get_projected_linkage();
ceph_assert(stray_dnl->is_primary());
CInode *stray_in = stray_dnl->get_inode();
- ceph_assert(stray_in->inode.nlink >= 1);
+ ceph_assert(stray_in->get_inode()->nlink >= 1);
ceph_assert(stray_in->last == CEPH_NOSNAP);
/* If no remote_dn hinted, pick one arbitrarily */
dout(10) << " realm " << *realm << dendl;
const SnapContext *snapc = &realm->get_snap_context();
- uint64_t to = in->inode.get_max_size();
- to = std::max(in->inode.size, to);
+ uint64_t to = std::max(in->get_inode()->size, in->get_inode()->get_max_size());
// when truncating a file, the filer does not delete stripe objects that are
// truncated to zero. so we need to purge stripe objects up to the max size
// the file has ever been.
- to = std::max(in->inode.max_size_ever, to);
+ to = std::max(in->get_inode()->max_size_ever, to);
ceph_assert(to > 0);
PurgeItem item;
item.action = PurgeItem::TRUNCATE_FILE;
- item.ino = in->inode.ino;
- item.layout = in->inode.layout;
+ item.ino = in->ino();
+ item.layout = in->get_inode()->layout;
item.snapc = *snapc;
item.size = to;
item.stamp = ceph_clock_now();
std::string dn; // dentry
snapid_t dnfirst, dnlast;
version_t dnv{0};
- CInode::mempool_inode inode; // if it's not XXX should not be part of mempool; wait for std::pmr to simplify
+ CInode::inode_const_ptr inode; // if it's not XXX should not be part of mempool; wait for std::pmr to simplify
+ CInode::xattr_map_const_ptr xattrs;
fragtree_t dirfragtree;
- CInode::mempool_xattr_map xattrs;
std::string symlink;
snapid_t oldest_snap;
bufferlist snapbl;
__u8 state{0};
- CInode::mempool_old_inode_map old_inodes; // XXX should not be part of mempool; wait for std::pmr to simplify
+ CInode::old_inode_map_const_ptr old_inodes; // XXX should not be part of mempool; wait for std::pmr to simplify
fullbit(std::string_view d, snapid_t df, snapid_t dl,
- version_t v, const CInode::mempool_inode& i, const fragtree_t &dft,
- const CInode::mempool_xattr_map &xa, std::string_view sym,
+ version_t v, const CInode::inode_const_ptr& i, const fragtree_t &dft,
+ const CInode::xattr_map_const_ptr& xa, std::string_view sym,
snapid_t os, const bufferlist &sbl, __u8 st,
- const CInode::mempool_old_inode_map *oi = NULL) :
+ const CInode::old_inode_map_const_ptr& oi) :
dn(d), dnfirst(df), dnlast(dl), dnv(v), inode(i), xattrs(xa),
- oldest_snap(os), state(st)
+ oldest_snap(os), state(st), old_inodes(oi)
{
- if (i.is_symlink())
+ if (i->is_symlink())
symlink = sym;
- if (i.is_dir())
+ if (i->is_dir())
dirfragtree = dft;
- if (oi)
- old_inodes = *oi;
snapbl = sbl;
}
explicit fullbit(bufferlist::const_iterator &p) {
void print(ostream& out) const {
out << " fullbit dn " << dn << " [" << dnfirst << "," << dnlast << "] dnv " << dnv
- << " inode " << inode.ino
+ << " inode " << inode->ino
<< " state=" << state << std::endl;
}
string state_string() const {
in->last_journaled = event_seq;
//cout << "journaling " << in->inode.ino << " at " << my_offset << std::endl;
- const auto pi = in->get_projected_inode();
+ const auto& pi = in->get_projected_inode();
if ((state & fullbit::STATE_DIRTY) && pi->is_backtrace_updated())
state |= fullbit::STATE_DIRTYPARENT;
lump.nfull++;
lump.add_dfull(dn->get_name(), dn->first, dn->last, dn->get_projected_version(),
- *pi, in->dirfragtree, *in->get_projected_xattrs(), in->symlink,
- in->oldest_snap, snapbl, state, &in->old_inodes);
+ pi, in->dirfragtree, in->get_projected_xattrs(), in->symlink,
+ in->oldest_snap, snapbl, state, in->get_old_inodes());
}
// convenience: primary or remote? figure it out.
in->last_journaled = event_seq;
//cout << "journaling " << in->inode.ino << " at " << my_offset << std::endl;
- const auto& pi = *(in->get_projected_inode());
+ const auto& pi = in->get_projected_inode();
+ const auto& px = in->get_projected_xattrs();
const auto& pdft = in->dirfragtree;
- const auto& px = *(in->get_projected_xattrs());
bufferlist snapbl;
const sr_t *sr = in->get_projected_srnode();
sr->encode(snapbl);
for (auto p = roots.begin(); p != roots.end(); ++p) {
- if (p->inode.ino == in->ino()) {
+ if (p->inode->ino == in->ino()) {
roots.erase(p);
break;
}
string empty;
roots.emplace_back(empty, in->first, in->last, 0, pi, pdft, px, in->symlink,
in->oldest_snap, snapbl, (dirty ? fullbit::STATE_DIRTY : 0),
- &in->old_inodes);
+ in->get_old_inodes());
}
dirlump& add_dir(CDir *dir, bool dirty, bool complete=false) {
encode(dnfirst, bl);
encode(dnlast, bl);
encode(dnv, bl);
- encode(inode, bl, features);
- encode(xattrs, bl);
- if (inode.is_symlink())
+ encode(*inode, bl, features);
+ if (xattrs)
+ encode(*xattrs, bl);
+ else
+ encode((__u32)0, bl);
+
+ if (inode->is_symlink())
encode(symlink, bl);
- if (inode.is_dir()) {
+ if (inode->is_dir()) {
encode(dirfragtree, bl);
encode(snapbl, bl);
}
encode(state, bl);
- if (old_inodes.empty()) {
+ if (!old_inodes || old_inodes->empty()) {
encode(false, bl);
} else {
encode(true, bl);
- encode(old_inodes, bl, features);
+ encode(*old_inodes, bl, features);
}
- if (!inode.is_dir())
+ if (!inode->is_dir())
encode(snapbl, bl);
encode(oldest_snap, bl);
ENCODE_FINISH(bl);
decode(dnfirst, bl);
decode(dnlast, bl);
decode(dnv, bl);
- decode(inode, bl);
- decode_noshare(xattrs, bl);
- if (inode.is_symlink())
+ {
+ auto _inode = CInode::allocate_inode();
+ decode(*_inode, bl);
+ inode = std::move(_inode);
+ }
+ {
+ CInode::mempool_xattr_map tmp;
+ decode_noshare(tmp, bl);
+ if (!tmp.empty())
+ xattrs = CInode::allocate_xattr_map(std::move(tmp));
+ }
+ if (inode->is_symlink())
decode(symlink, bl);
- if (inode.is_dir()) {
+ if (inode->is_dir()) {
decode(dirfragtree, bl);
decode(snapbl, bl);
}
bool old_inodes_present;
decode(old_inodes_present, bl);
if (old_inodes_present) {
- decode(old_inodes, bl);
+ auto _old_inodes = CInode::allocate_old_inode_map();
+ decode(*_old_inodes, bl);
+ old_inodes = std::move(_old_inodes);
}
- if (!inode.is_dir()) {
+ if (!inode->is_dir()) {
decode(snapbl, bl);
}
decode(oldest_snap, bl);
f->dump_stream("snapid.last") << dnlast;
f->dump_int("dentry version", dnv);
f->open_object_section("inode");
- inode.dump(f);
+ inode->dump(f);
f->close_section(); // inode
f->open_object_section("xattrs");
- for (const auto &p : xattrs) {
- std::string s(p.second.c_str(), p.second.length());
- f->dump_string(p.first.c_str(), s);
+ if (xattrs) {
+ for (const auto &p : *xattrs) {
+ std::string s(p.second.c_str(), p.second.length());
+ f->dump_string(p.first.c_str(), s);
+ }
}
f->close_section(); // xattrs
- if (inode.is_symlink()) {
+ if (inode->is_symlink()) {
f->dump_string("symlink", symlink);
}
- if (inode.is_dir()) {
+ if (inode->is_dir()) {
f->dump_stream("frag tree") << dirfragtree;
f->dump_string("has_snapbl", snapbl.length() ? "true" : "false");
- if (inode.has_layout()) {
+ if (inode->has_layout()) {
f->open_object_section("file layout policy");
// FIXME
f->dump_string("layout", "the layout exists");
}
}
f->dump_string("state", state_string());
- if (!old_inodes.empty()) {
+ if (old_inodes && !old_inodes->empty()) {
f->open_array_section("old inodes");
- for (const auto &p : old_inodes) {
+ for (const auto &p : *old_inodes) {
f->open_object_section("inode");
f->dump_int("snapid", p.first);
p.second.dump(f);
void EMetaBlob::fullbit::generate_test_instances(std::list<EMetaBlob::fullbit*>& ls)
{
- CInode::mempool_inode inode;
+ auto _inode = CInode::allocate_inode();
fragtree_t fragtree;
- CInode::mempool_xattr_map empty_xattrs;
+ auto _xattrs = CInode::allocate_xattr_map();
bufferlist empty_snapbl;
fullbit *sample = new fullbit("/testdn", 0, 0, 0,
- inode, fragtree, empty_xattrs, "", 0, empty_snapbl,
+ _inode, fragtree, _xattrs, "", 0, empty_snapbl,
false, NULL);
ls.push_back(sample);
}
void EMetaBlob::fullbit::update_inode(MDSRank *mds, CInode *in)
{
- in->inode = inode;
- in->xattrs = xattrs;
- if (in->inode.is_dir()) {
+ in->reset_inode(std::move(inode));
+ in->reset_xattrs(std::move(xattrs));
+ if (in->is_dir()) {
if (is_export_ephemeral_random()) {
dout(15) << "random ephemeral pin on " << *in << dendl;
in->set_ephemeral_rand(true);
if (!(in->dirfragtree == dirfragtree)) {
dout(10) << "EMetaBlob::fullbit::update_inode dft " << in->dirfragtree << " -> "
<< dirfragtree << " on " << *in << dendl;
- in->dirfragtree = dirfragtree;
+ in->dirfragtree = std::move(dirfragtree);
in->force_dirfrags();
if (in->get_num_dirfrags() && in->authority() == CDIR_AUTH_UNDEF) {
auto&& ls = in->get_nested_dirfrags();
}
}
}
- } else if (in->inode.is_symlink()) {
+ } else if (in->is_symlink()) {
in->symlink = symlink;
}
- in->old_inodes = old_inodes;
- if (!in->old_inodes.empty()) {
- snapid_t min_first = in->old_inodes.rbegin()->first + 1;
+ in->reset_old_inodes(std::move(old_inodes));
+ if (in->is_any_old_inodes()) {
+ snapid_t min_first = in->get_old_inodes()->rbegin()->first + 1;
if (min_first > in->first)
in->first = min_first;
}
*/
if (in->is_file()) {
// Files must have valid layouts with a pool set
- if (in->inode.layout.pool_id == -1 || !in->inode.layout.is_valid()) {
+ if (in->get_inode()->layout.pool_id == -1 ||
+ !in->get_inode()->layout.is_valid()) {
dout(0) << "EMetaBlob.replay invalid layout on ino " << *in
- << ": " << in->inode.layout << dendl;
+ << ": " << in->get_inode()->layout << dendl;
std::ostringstream oss;
oss << "Invalid layout for inode " << in->ino() << " in journal";
mds->clog->error() << oss.str();
// Record inodes of fullbits
for (const auto& iter : dl.get_dfull()) {
- inodes.insert(iter.inode.ino);
+ inodes.insert(iter.inode->ino);
}
// Record inodes of remotebits
for (const auto& iter : dl.get_dfull()) {
std::string_view dentry = iter.dn;
children[dir_ino].emplace_back(dentry);
- ino_locations[iter.inode.ino] = Location(dir_ino, dentry);
+ ino_locations[iter.inode->ino] = Location(dir_ino, dentry);
}
for (const auto& iter : dl.get_dremote()) {
for (const auto& iter : dl.get_dfull()) {
std::string_view dentry = iter.dn;
- if (children.find(iter.inode.ino) == children.end()) {
+ if (children.find(iter.inode->ino) == children.end()) {
leaf_locations.push_back(Location(dir_ino, dentry));
}
}
ceph_assert(g_conf()->mds_kill_journal_replay_at != 1);
for (auto& p : roots) {
- CInode *in = mds->mdcache->get_inode(p.inode.ino);
+ CInode *in = mds->mdcache->get_inode(p.inode->ino);
bool isnew = in ? false:true;
if (!in)
in = new CInode(mds->mdcache, false, 2, CEPH_NOSNAP);
if (lump.is_importing())
dn->state_set(CDentry::STATE_AUTH);
- CInode *in = mds->mdcache->get_inode(fb.inode.ino, fb.dnlast);
+ CInode *in = mds->mdcache->get_inode(fb.inode->ino, fb.dnlast);
if (!in) {
in = new CInode(mds->mdcache, dn->is_auth(), fb.dnfirst, fb.dnlast);
fb.update_inode(mds, in);
unlinked[dn->get_linkage()->get_inode()] = dir;
stringstream ss;
ss << "EMetaBlob.replay FIXME had dentry linked to wrong inode " << *dn
- << " " << *dn->get_linkage()->get_inode() << " should be " << fb.inode.ino;
+ << " " << *dn->get_linkage()->get_inode() << " should be " << in->ino();
dout(0) << ss.str() << dendl;
mds->clog->warn(ss);
}
unlinked[dn->get_linkage()->get_inode()] = dir;
stringstream ss;
ss << "EMetaBlob.replay FIXME had dentry linked to wrong inode " << *dn
- << " " << *dn->get_linkage()->get_inode() << " should be " << fb.inode.ino;
+ << " " << *dn->get_linkage()->get_inode() << " should be " << in->ino();
dout(0) << ss.str() << dendl;
mds->clog->warn(ss);
}
decode(version, p);
uint32_t inline_len;
decode(inline_len, p);
- if (inline_len > 0)
- ceph::decode_nohead(inline_len, get_data(), p);
- else
+ if (inline_len > 0) {
+ ceph::buffer::list bl;
+ decode_nohead(inline_len, bl, p);
+ set_data(bl);
+ } else
free_data();
}
#include "include/frag.h"
#include "include/xlist.h"
#include "include/interval_set.h"
-#include "include/compact_map.h"
#include "include/compact_set.h"
#include "include/fs_types.h"
inline_data_t() {}
inline_data_t(const inline_data_t& o) : version(o.version) {
if (o.blp)
- get_data() = *o.blp;
+ set_data(*o.blp);
}
inline_data_t& operator=(const inline_data_t& o) {
version = o.version;
if (o.blp)
- get_data() = *o.blp;
+ set_data(*o.blp);
else
free_data();
return *this;
void free_data() {
blp.reset();
}
- ceph::buffer::list& get_data() {
+ void get_data(ceph::buffer::list& ret) const {
+ if (blp)
+ ret = *blp;
+ else
+ ret.clear();
+ }
+ void set_data(const ceph::buffer::list& bl) {
if (!blp)
blp.reset(new ceph::buffer::list);
- return *blp;
+ *blp = bl;
}
size_t length() const { return blp ? blp->length() : 0; }
using alloc_string = std::basic_string<char,std::char_traits<char>,Allocator<char>>;
template<template<typename> class Allocator>
-using xattr_map = compact_map<alloc_string<Allocator>,
- ceph::bufferptr,
- std::less<alloc_string<Allocator>>,
- Allocator<std::pair<const alloc_string<Allocator>,
- ceph::bufferptr>>>; // FIXME bufferptr not in mempool
+using xattr_map = std::map<alloc_string<Allocator>,
+ ceph::bufferptr,
+ std::less<alloc_string<Allocator>>,
+ Allocator<std::pair<const alloc_string<Allocator>,
+ ceph::bufferptr>>>; // FIXME bufferptr not in mempool
template<template<typename> class Allocator>
inline void decode_noshare(xattr_map<Allocator>& xattrs, ceph::buffer::list::const_iterator &p)
}
void add_inode_locks(CInode *in, __u32 nonce, ceph::buffer::list& bl) {
using ceph::encode;
- encode(in->inode.ino, inode_locks);
+ encode(in->ino(), inode_locks);
encode(in->last, inode_locks);
encode(nonce, inode_locks);
encode(bl, inode_locks);
}
void add_inode_base(CInode *in, uint64_t features) {
using ceph::encode;
- encode(in->inode.ino, inode_base);
+ encode(in->ino(), inode_base);
encode(in->last, inode_base);
ceph::buffer::list bl;
in->_encode_base(bl, features);
}
// Compose
- InodeStore inode;
- inode.inode.ino = inono;
- inode.inode.version = 1;
- inode.inode.xattr_version = 1;
- inode.inode.mode = 0500 | mode;
+ InodeStore inode_data;
+ auto inode = inode_data.get_inode();
+ inode->ino = inono;
+ inode->version = 1;
+ inode->xattr_version = 1;
+ inode->mode = 0500 | mode;
// Fake dirstat.nfiles to 1, so that the directory doesn't appear to be empty
// (we won't actually give the *correct* dirstat here though)
- inode.inode.dirstat.nfiles = 1;
+ inode->dirstat.nfiles = 1;
- inode.inode.ctime =
- inode.inode.mtime = ceph_clock_now();
- inode.inode.nlink = 1;
- inode.inode.truncate_size = -1ull;
- inode.inode.truncate_seq = 1;
- inode.inode.uid = g_conf()->mds_root_ino_uid;
- inode.inode.gid = g_conf()->mds_root_ino_gid;
+ inode->ctime = inode->mtime = ceph_clock_now();
+ inode->nlink = 1;
+ inode->truncate_size = -1ull;
+ inode->truncate_seq = 1;
+ inode->uid = g_conf()->mds_root_ino_uid;
+ inode->gid = g_conf()->mds_root_ino_gid;
// Force layout to default: should we let users override this so that
// they don't have to mount the filesystem to correct it?
- inode.inode.layout = file_layout_t::get_default();
- inode.inode.layout.pool_id = data_pool_id;
- inode.inode.dir_layout.dl_dir_hash = g_conf()->mds_default_dir_hash;
+ inode->layout = file_layout_t::get_default();
+ inode->layout.pool_id = data_pool_id;
+ inode->dir_layout.dl_dir_hash = g_conf()->mds_default_dir_hash;
// Assume that we will get our stats wrong, and that we may
// be ignoring dirfrags that exist
- inode.damage_flags |= (DAMAGE_STATS | DAMAGE_RSTATS | DAMAGE_FRAGTREE);
+ inode_data.damage_flags |= (DAMAGE_STATS | DAMAGE_RSTATS | DAMAGE_FRAGTREE);
if (inono == MDS_INO_ROOT || MDS_INO_IS_MDSDIR(inono)) {
sr_t srnode;
srnode.seq = 1;
- encode(srnode, inode.snap_blob);
+ encode(srnode, inode_data.snap_blob);
}
// Serialize
bufferlist inode_bl;
encode(std::string(CEPH_FS_ONDISK_MAGIC), inode_bl);
- inode.encode(inode_bl, CEPH_FEATURES_SUPPORTED_DEFAULT);
+ inode_data.encode(inode_bl, CEPH_FEATURES_SUPPORTED_DEFAULT);
// Write
r = metadata_io.write_full(oid.name, inode_bl);
bool is_dir;
map<snapid_t, SnapInfo> snaps;
link_info_t() : version(0), nlink(0), is_dir(false) {}
- link_info_t(inodeno_t di, frag_t df, const string& n, const CInode::mempool_inode& i) :
+ link_info_t(inodeno_t di, frag_t df, const string& n, const CInode::inode_const_ptr& i) :
dirino(di), frag(df), name(n),
- version(i.version), nlink(i.nlink), is_dir(S_IFDIR & i.mode) {}
+ version(i->version), nlink(i->nlink), is_dir(S_IFDIR & i->mode) {}
dirfrag_t dirfrag() const {
return dirfrag_t(dirino, frag);
}
if (dentry_type == 'I') {
InodeStore inode;
inode.decode_bare(q);
- inodeno_t ino = inode.inode.ino;
+ inodeno_t ino = inode.inode->ino;
if (step == SCAN_INOS) {
if (used_inos.contains(ino, 1)) {
snaprealm_v2_since = last + 1;
}
}
- if (!inode.old_inodes.empty()) {
- if (inode.old_inodes.rbegin()->first > last_snap)
- last_snap = inode.old_inodes.rbegin()->first;
+ if (inode.old_inodes && !inode.old_inodes->empty()) {
+ auto _last_snap = inode.old_inodes->rbegin()->first;
+ if (_last_snap > last_snap)
+ last_snap = _last_snap;
}
auto q = dup_primaries.find(ino);
if (q != dup_primaries.end()) {
nlink = r->second;
if (!MDS_INO_IS_STRAY(dir_ino))
nlink++;
- if (inode.inode.nlink != nlink) {
+ if (inode.inode->nlink != nlink) {
derr << "Bad nlink on " << ino << " expected " << nlink
- << " has " << inode.inode.nlink << dendl;
+ << " has " << inode.inode->nlink << dendl;
bad_nlink_inos[ino] = link_info_t(dir_ino, frag_id, dname, inode.inode);
bad_nlink_inos[ino].nlink = nlink;
}
return r;
}
- if (inode.inode.ino != p.first || inode.inode.version != p.second.version)
+ if (inode.inode->ino != p.first || inode.inode->version != p.second.version)
continue;
- inode.inode.nlink = p.second.nlink;
+ inode.get_inode()->nlink = p.second.nlink;
r = metadata_driver->inject_linkage(p.second.dirino, p.second.name, p.second.frag, inode, first);
if (r < 0)
return r;
return r;
}
} else {
- if (!(lf_ino.inode.mode & S_IFDIR)) {
+ if (!(lf_ino.inode->mode & S_IFDIR)) {
derr << "lost+found exists but is not a directory!" << dendl;
// In this case we error out, and the user should do something about
// this problem.
return r;
}
- InodeStore recovered_ino;
-
-
const std::string dname = lost_found_dname(ino);
// Write dentry into lost+found dirfrag
- return inject_linkage(lf_ino.inode.ino, dname, frag_t(), dentry);
+ return inject_linkage(lf_ino.inode->ino, dname, frag_t(), dentry);
}
r = read_dentry(parent_ino, frag_t(), parent_dname, &existing_dentry);
if (r >= 0) {
// Great, fast path: return the fragtree from here
- if (existing_dentry.inode.ino != dirino) {
+ if (existing_dentry.inode->ino != dirino) {
dout(4) << "Unexpected inode in dentry! 0x" << std::hex
- << existing_dentry.inode.ino
+ << existing_dentry.inode->ino
<< " vs expected 0x" << dirino << std::dec << dendl;
return -ENOENT;
}
break;
} else {
// Dentry already present, does it link to me?
- if (existing_dentry.inode.ino == ino) {
+ if (existing_dentry.inode->ino == ino) {
dout(20) << "Dentry 0x" << std::hex
<< parent_ino << std::dec << "/"
<< dname << " already exists and points to me" << dendl;
derr << "Dentry 0x" << std::hex
<< parent_ino << std::dec << "/"
<< dname << " already exists but points to 0x"
- << std::hex << existing_dentry.inode.ino << std::dec << dendl;
+ << std::hex << existing_dentry.inode->ino << std::dec << dendl;
// Fall back to lost+found!
return inject_lost_and_found(backtrace.ino, dentry);
}
// This is the linkage for the file of interest
dout(10) << "Linking inode 0x" << std::hex << ino
<< " at 0x" << parent_ino << "/" << dname << std::dec
- << " with size=" << dentry.inode.size << " bytes" << dendl;
+ << " with size=" << dentry.inode->size << " bytes" << dendl;
r = inject_linkage(parent_ino, dname, fragment, dentry);
} else {
// This is the linkage for an ancestor directory
InodeStore ancestor_dentry;
- ancestor_dentry.inode.mode = 0755 | S_IFDIR;
+ auto inode = ancestor_dentry.get_inode();
+ inode->mode = 0755 | S_IFDIR;
// Set nfiles to something non-zero, to fool any other code
// that tries to ignore 'empty' directories. This won't be
// accurate, but it should avoid functional issues.
- ancestor_dentry.inode.dirstat.nfiles = 1;
- ancestor_dentry.inode.dir_layout.dl_dir_hash =
- g_conf()->mds_default_dir_hash;
+ inode->dirstat.nfiles = 1;
+ inode->dir_layout.dl_dir_hash =
+ g_conf()->mds_default_dir_hash;
- ancestor_dentry.inode.nlink = 1;
- ancestor_dentry.inode.ino = ino;
- ancestor_dentry.inode.uid = g_conf()->mds_root_ino_uid;
- ancestor_dentry.inode.gid = g_conf()->mds_root_ino_gid;
- ancestor_dentry.inode.version = 1;
- ancestor_dentry.inode.backtrace_version = 1;
+ inode->nlink = 1;
+ inode->ino = ino;
+ inode->uid = g_conf()->mds_root_ino_uid;
+ inode->gid = g_conf()->mds_root_ino_gid;
+ inode->version = 1;
+ inode->backtrace_version = 1;
r = inject_linkage(parent_ino, dname, fragment, ancestor_dentry);
}
} else {
dout(20) << "Injected dentry 0x" << std::hex
<< dir_ino << "/" << dname << " pointing to 0x"
- << inode.inode.ino << std::dec << dendl;
+ << inode.inode->ino << std::dec << dendl;
return 0;
}
}
if (is_file) {
// FIXME: inject_data won't cope with interesting (i.e. striped)
// layouts (need a librados-compatible Filer to read these)
- inject_data(path_builder, dentry.inode.size,
- dentry.inode.layout.object_size, bt.ino);
+ inject_data(path_builder, dentry.inode->size,
+ dentry.inode->layout.object_size, bt.ino);
} else {
int r = mkdir(path_builder.c_str(), 0755);
if (r != 0 && r != -EPERM) {
}
std::string file_path = lf_path + "/" + lost_found_dname(ino);
- return inject_data(file_path, dentry.inode.size,
- dentry.inode.layout.object_size, ino);
+ return inject_data(file_path, dentry.inode->size,
+ dentry.inode->layout.object_size, ino);
}
int LocalFileDriver::init_roots(int64_t data_pool_id)
{
ceph_assert(out != NULL);
- out->inode.mode = 0500 | S_IFREG;
- out->inode.size = file_size;
- out->inode.max_size_ever = file_size;
- out->inode.mtime.tv.tv_sec = file_mtime;
- out->inode.atime.tv.tv_sec = file_mtime;
- out->inode.ctime.tv.tv_sec = file_mtime;
+ auto inode = out->get_inode();
+ inode->mode = 0500 | S_IFREG;
+ inode->size = file_size;
+ inode->max_size_ever = file_size;
+ inode->mtime.tv.tv_sec = file_mtime;
+ inode->atime.tv.tv_sec = file_mtime;
+ inode->ctime.tv.tv_sec = file_mtime;
- out->inode.layout = layout;
+ inode->layout = layout;
- out->inode.truncate_seq = 1;
- out->inode.truncate_size = -1ull;
+ inode->truncate_seq = 1;
+ inode->truncate_size = -1ull;
- out->inode.inline_data.version = CEPH_INLINE_NONE;
+ inode->inline_data.version = CEPH_INLINE_NONE;
- out->inode.nlink = 1;
- out->inode.ino = ino;
- out->inode.version = 1;
- out->inode.backtrace_version = 1;
- out->inode.uid = g_conf()->mds_root_ino_uid;
- out->inode.gid = g_conf()->mds_root_ino_gid;
+ inode->nlink = 1;
+ inode->ino = ino;
+ inode->version = 1;
+ inode->backtrace_version = 1;
+ inode->uid = g_conf()->mds_root_ino_uid;
+ inode->gid = g_conf()->mds_root_ino_gid;
}
void MetadataTool::build_dir_dentry(
{
ceph_assert(out != NULL);
- out->inode.mode = 0755 | S_IFDIR;
- out->inode.dirstat = fragstat;
- out->inode.mtime.tv.tv_sec = fragstat.mtime;
- out->inode.atime.tv.tv_sec = fragstat.mtime;
- out->inode.ctime.tv.tv_sec = fragstat.mtime;
+ auto inode = out->get_inode();
+ inode->mode = 0755 | S_IFDIR;
+ inode->dirstat = fragstat;
+ inode->mtime.tv.tv_sec = fragstat.mtime;
+ inode->atime.tv.tv_sec = fragstat.mtime;
+ inode->ctime.tv.tv_sec = fragstat.mtime;
- out->inode.layout = layout;
- out->inode.dir_layout.dl_dir_hash = g_conf()->mds_default_dir_hash;
+ inode->layout = layout;
+ inode->dir_layout.dl_dir_hash = g_conf()->mds_default_dir_hash;
- out->inode.truncate_seq = 1;
- out->inode.truncate_size = -1ull;
+ inode->truncate_seq = 1;
+ inode->truncate_size = -1ull;
- out->inode.inline_data.version = CEPH_INLINE_NONE;
+ inode->inline_data.version = CEPH_INLINE_NONE;
- out->inode.nlink = 1;
- out->inode.ino = ino;
- out->inode.version = 1;
- out->inode.backtrace_version = 1;
- out->inode.uid = g_conf()->mds_root_ino_uid;
- out->inode.gid = g_conf()->mds_root_ino_gid;
+ inode->nlink = 1;
+ inode->ino = ino;
+ inode->version = 1;
+ inode->backtrace_version = 1;
+ inode->uid = g_conf()->mds_root_ino_uid;
+ inode->gid = g_conf()->mds_root_ino_gid;
}
InodeStore inode;
inode.decode_bare(q);
dout(4) << "decoded embedded inode version "
- << inode.inode.version << " vs fullbit version "
- << fb.inode.version << dendl;
- if (inode.inode.version < fb.inode.version) {
+ << inode.inode->version << " vs fullbit version "
+ << fb.inode->version << dendl;
+ if (inode.inode->version < fb.inode->version) {
write_dentry = true;
}
} else {
// Record for writing to RADOS
write_vals[key] = dentry_bl;
- consumed_inos->insert(fb.inode.ino);
+ consumed_inos->insert(fb.inode->ino);
}
}
* of directories
*/
for (const auto& fb : metablob.roots) {
- inodeno_t ino = fb.inode.ino;
+ inodeno_t ino = fb.inode->ino;
dout(4) << "updating root 0x" << std::hex << ino << std::dec << dendl;
object_t root_oid = InodeStore::get_object_name(ino, frag_t(), ".inode");
dout(4) << "magic ok" << dendl;
old_inode.decode(inode_bl_iter);
- if (old_inode.inode.version < fb.inode.version) {
+ if (old_inode.inode->version < fb.inode->version) {
write_root_ino = true;
}
} else {
if (write_root_ino && !dry_run) {
dout(4) << "writing root ino " << root_oid.name
- << " version " << fb.inode.version << dendl;
+ << " version " << fb.inode->version << dendl;
// Compose: root ino format is magic,InodeStore(bare=false)
bufferlist new_root_ino_bl;
f->open_object_section("fnodes");
for (const auto &frag : frags) {
bufferlist hbl;
- string oid = obj_name(inode_meta.get_meta()->inode.ino, frag);
+ string oid = obj_name(inode_meta.get_meta()->inode->ino, frag);
int ret = io_meta.omap_get_header(oid, &hbl);
if (ret < 0) {
std::cerr << __func__ << " : can't find oid("<< oid << ")" << std::endl;
f->close_section();
f->flush(ds);
- if (sp_ino > 0 && op != NULL && sp_ino == inode_data.inode.ino) {
+ if (sp_ino > 0 && op != NULL && sp_ino == inode_data.inode->ino) {
inode_meta_t* tmp = new inode_meta_t(first, type, &inode_data);
- op->inodes[inode_data.inode.ino] = tmp;
- op->okeys[inode_data.inode.ino] = key.data();
+ op->inodes[inode_data.inode->ino] = tmp;
+ op->okeys[inode_data.inode->ino] = key.data();
return 1;
} else {
delete &inode_data;