*/
-
#ifndef CEPH_CDIR_H
#define CEPH_CDIR_H
struct ObjectOperation;
ostream& operator<<(ostream& out, const class CDir& dir);
-class CDir : public MDSCacheObject, public Counter<CDir> {
- using time = ceph::coarse_mono_time;
- using clock = ceph::coarse_mono_clock;
-
- friend ostream& operator<<(ostream& out, const class CDir& dir);
+class CDir : public MDSCacheObject, public Counter<CDir> {
public:
MEMPOOL_CLASS_HELPERS();
+
+ typedef mempool::mds_co::map<dentry_key_t, CDentry*> dentry_key_map;
+ typedef mempool::mds_co::set<dentry_key_t> dentry_key_set;
+
+ // -- freezing --
+ struct freeze_tree_state_t {
+ CDir *dir; // freezing/frozen tree root
+ int auth_pins = 0;
+ bool frozen = false;
+ freeze_tree_state_t(CDir *d) : dir(d) {}
+ };
+
+ class scrub_info_t {
+ public:
+ struct scrub_stamps {
+ version_t version;
+ utime_t time;
+ scrub_stamps() : version(0) {}
+ void operator=(const scrub_stamps &o) {
+ version = o.version;
+ time = o.time;
+ }
+ };
+
+ scrub_info_t() :
+ directory_scrubbing(false),
+ need_scrub_local(false),
+ last_scrub_dirty(false),
+ pending_scrub_error(false) {}
+
+ /// inodes we contain with dirty scrub stamps
+ dentry_key_map dirty_scrub_stamps; // TODO: make use of this!
+
+ scrub_stamps recursive_start; // when we last started a recursive scrub
+ scrub_stamps last_recursive; // when we last finished a recursive scrub
+ scrub_stamps last_local; // when we last did a local scrub
+
+ bool directory_scrubbing; /// safety check
+ bool need_scrub_local;
+ bool last_scrub_dirty; /// is scrub info dirty or is it flushed to fnode?
+ bool pending_scrub_error;
+
+ /// these are lists of children in each stage of scrubbing
+ dentry_key_set directories_to_scrub;
+ dentry_key_set directories_scrubbing;
+ dentry_key_set directories_scrubbed;
+ dentry_key_set others_to_scrub;
+ dentry_key_set others_scrubbing;
+ dentry_key_set others_scrubbed;
+
+ ScrubHeaderRefConst header;
+ };
+
// -- pins --
static const int PIN_DNWAITER = 1;
static const int PIN_INOWAITER = 2;
static const int PIN_EXPORTBOUND = 10;
static const int PIN_STICKY = 11;
static const int PIN_SUBTREETEMP = 12; // used by MDCache::trim_non_auth()
- std::string_view pin_name(int p) const override {
- switch (p) {
- case PIN_DNWAITER: return "dnwaiter";
- case PIN_INOWAITER: return "inowaiter";
- case PIN_CHILD: return "child";
- case PIN_FROZEN: return "frozen";
- case PIN_SUBTREE: return "subtree";
- case PIN_IMPORTING: return "importing";
- case PIN_IMPORTBOUND: return "importbound";
- case PIN_EXPORTBOUND: return "exportbound";
- case PIN_STICKY: return "sticky";
- case PIN_SUBTREETEMP: return "subtreetemp";
- default: return generic_pin_name(p);
- }
- }
// -- state --
- static const unsigned STATE_COMPLETE = (1<< 0); // the complete contents are in cache
- static const unsigned STATE_FROZENTREE = (1<< 1); // root of tree (bounded by exports)
- static const unsigned STATE_FREEZINGTREE = (1<< 2); // in process of freezing
+ static const unsigned STATE_COMPLETE = (1<< 0); // the complete contents are in cache
+ static const unsigned STATE_FROZENTREE = (1<< 1); // root of tree (bounded by exports)
+ static const unsigned STATE_FREEZINGTREE = (1<< 2); // in process of freezing
static const unsigned STATE_FROZENDIR = (1<< 3);
static const unsigned STATE_FREEZINGDIR = (1<< 4);
- static const unsigned STATE_COMMITTING = (1<< 5); // mid-commit
- static const unsigned STATE_FETCHING = (1<< 6); // currenting fetching
+ static const unsigned STATE_COMMITTING = (1<< 5); // mid-commit
+ static const unsigned STATE_FETCHING = (1<< 6); // currenting fetching
static const unsigned STATE_CREATING = (1<< 7);
static const unsigned STATE_IMPORTBOUND = (1<< 8);
static const unsigned STATE_EXPORTBOUND = (1<< 9);
static const int REP_ALL = 1;
static const int REP_LIST = 2;
-
static const unsigned EXPORT_NONCE = 1;
-
// -- wait masks --
static const uint64_t WAIT_DENTRY = (1<<0); // wait for item to be in cache
static const uint64_t WAIT_COMPLETE = (1<<1); // wait for complete dir contents
static const int DUMP_MDS_CACHE_OBJECT = (1 << 7);
static const int DUMP_ITEMS = (1 << 8);
static const int DUMP_ALL = (-1);
- static const int DUMP_DEFAULT = DUMP_ALL & (~DUMP_ITEMS);
+ static const int DUMP_DEFAULT = DUMP_ALL & (~DUMP_ITEMS);
- public:
- // context
- MDCache *cache;
+ CDir(CInode *in, frag_t fg, MDCache *mdcache, bool auth);
- CInode *inode; // my inode
- frag_t frag; // my frag
+ std::string_view pin_name(int p) const override {
+ switch (p) {
+ case PIN_DNWAITER: return "dnwaiter";
+ case PIN_INOWAITER: return "inowaiter";
+ case PIN_CHILD: return "child";
+ case PIN_FROZEN: return "frozen";
+ case PIN_SUBTREE: return "subtree";
+ case PIN_IMPORTING: return "importing";
+ case PIN_IMPORTBOUND: return "importbound";
+ case PIN_EXPORTBOUND: return "exportbound";
+ case PIN_STICKY: return "sticky";
+ case PIN_SUBTREETEMP: return "subtreetemp";
+ default: return generic_pin_name(p);
+ }
+ }
bool is_lt(const MDSCacheObject *r) const override {
return dirfrag() < (static_cast<const CDir*>(r))->dirfrag();
}
- fnode_t fnode;
- snapid_t first;
- mempool::mds_co::compact_map<snapid_t,old_rstat_t> dirty_old_rstat; // [value.first,key]
-
- // my inodes with dirty rstat data
- elist<CInode*> dirty_rstat_inodes;
-
void resync_accounted_fragstat();
void resync_accounted_rstat();
void assimilate_dirty_rstat_inodes();
inode->num_exporting_dirs--;
}
-protected:
- version_t projected_version;
- mempool::mds_co::list<fnode_t> projected_fnode;
-
-public:
- elist<CDentry*> dirty_dentries;
- elist<CDir*>::item item_dirty, item_new;
-
-public:
version_t get_version() const { return fnode.version; }
void set_version(version_t v) {
ceph_assert(projected_fnode.empty());
void mark_new(LogSegment *ls);
bool is_bad() { return state_test(STATE_BADFRAG); }
-private:
- void log_mark_dirty();
-
-public:
- typedef mempool::mds_co::map<dentry_key_t, CDentry*> dentry_key_map;
- typedef mempool::mds_co::set<dentry_key_t> dentry_key_set;
-
- class scrub_info_t {
- public:
- /// inodes we contain with dirty scrub stamps
- dentry_key_map dirty_scrub_stamps; // TODO: make use of this!
- struct scrub_stamps {
- version_t version;
- utime_t time;
- scrub_stamps() : version(0) {}
- void operator=(const scrub_stamps &o) {
- version = o.version;
- time = o.time;
- }
- };
-
- scrub_stamps recursive_start; // when we last started a recursive scrub
- scrub_stamps last_recursive; // when we last finished a recursive scrub
- scrub_stamps last_local; // when we last did a local scrub
-
- bool directory_scrubbing; /// safety check
- bool need_scrub_local;
- bool last_scrub_dirty; /// is scrub info dirty or is it flushed to fnode?
- bool pending_scrub_error;
- /// these are lists of children in each stage of scrubbing
- dentry_key_set directories_to_scrub;
- dentry_key_set directories_scrubbing;
- dentry_key_set directories_scrubbed;
- dentry_key_set others_to_scrub;
- dentry_key_set others_scrubbing;
- dentry_key_set others_scrubbed;
-
- ScrubHeaderRefConst header;
-
- scrub_info_t() :
- directory_scrubbing(false),
- need_scrub_local(false),
- last_scrub_dirty(false),
- pending_scrub_error(false) {}
- };
/**
* Call to start this CDir on a new scrub.
* @pre It is not currently scrubbing
* @returns true if the rstats and directory contents match, false otherwise.
*/
bool scrub_local();
-private:
- /**
- * Create a scrub_info_t struct for the scrub_infop pointer.
- */
- void scrub_info_create() const;
- /**
- * Delete the scrub_infop if it's not got any useful data.
- */
- void scrub_maybe_delete_info();
- /**
- * Check the given set (presumably one of those in scrub_info_t) for the
- * next key to scrub and look it up (or fail!).
- */
- int _next_dentry_on_set(dentry_key_set &dns, bool missing_okay,
- MDSContext *cb, CDentry **dnout);
-
-
-protected:
- std::unique_ptr<scrub_info_t> scrub_infop; // FIXME not in mempool
-
- // contents of this directory
- dentry_key_map items; // non-null AND null
- unsigned num_head_items;
- unsigned num_head_null;
- unsigned num_snap_items;
- unsigned num_snap_null;
-
- int num_dirty;
-
- int num_inodes_with_caps = 0;
-
- // state
- version_t committing_version;
- version_t committed_version;
-
- mempool::mds_co::compact_set<mempool::mds_co::string> stale_items;
-
- // lock nesting, freeze
- static int num_frozen_trees;
- static int num_freezing_trees;
-
- int dir_auth_pins;
-
- // cache control (defined for authority; hints for replicas)
- __s32 dir_rep;
- mempool::mds_co::compact_set<__s32> dir_rep_by; // if dir_rep == REP_LIST
-
- // popularity
- dirfrag_load_vec_t pop_me;
- dirfrag_load_vec_t pop_nested;
- dirfrag_load_vec_t pop_auth_subtree;
- dirfrag_load_vec_t pop_auth_subtree_nested;
-
- time last_popularity_sample = clock::zero();
-
- load_spread_t pop_spread;
-
- elist<CInode*> pop_lru_subdirs;
-
- // and to provide density
- int num_dentries_nested;
- int num_dentries_auth_subtree;
- int num_dentries_auth_subtree_nested;
-
-
- // friends
- friend class Migrator;
- friend class CInode;
- friend class MDCache;
- friend class MDiscover;
- friend class MDBalancer;
-
- friend class CDirDiscover;
- friend class CDirExport;
- friend class C_IO_Dir_TMAP_Fetched;
- friend class C_IO_Dir_OMAP_Fetched;
- friend class C_IO_Dir_OMAP_FetchedMore;
- friend class C_IO_Dir_Committed;
-
- std::unique_ptr<bloom_filter> bloom; // XXX not part of mempool::mds_co
- /* If you set up the bloom filter, you must keep it accurate!
- * It's deleted when you mark_complete() and is deliberately not serialized.*/
-
- public:
- CDir(CInode *in, frag_t fg, MDCache *mdcache, bool auth);
const scrub_info_t *scrub_info() const {
if (!scrub_infop) {
return scrub_infop.get();
}
-
// -- accessors --
inodeno_t ino() const { return inode->ino(); } // deprecate me?
frag_t get_frag() const { return frag; }
}
// -- dentries and inodes --
- public:
CDentry* lookup_exact_snap(std::string_view dname, snapid_t last);
CDentry* lookup(std::string_view n, snapid_t snap=CEPH_NOSNAP);
void remove_bloom() {
bloom.reset();
}
-private:
- void link_inode_work( CDentry *dn, CInode *in );
- void unlink_inode_work( CDentry *dn );
- void remove_null_dentries();
- void purge_stale_snap_data(const std::set<snapid_t>& snaps);
-public:
+
void try_remove_dentries_for_stray();
bool try_trim_snap_dentry(CDentry *dn, const std::set<snapid_t>& snaps);
-
-public:
void split(int bits, std::vector<CDir*>* subs, MDSContext::vec& waiters, bool replay);
void merge(const std::vector<CDir*>& subs, MDSContext::vec& waiters, bool replay);
return (int)get_frag_size() < g_conf()->mds_bal_merge_size;
}
-private:
- void prepare_new_fragment(bool replay);
- void prepare_old_fragment(map<string_snap_t, MDSContext::vec >& dentry_waiters, bool replay);
- void steal_dentry(CDentry *dn); // from another dir. used by merge/split.
- void finish_old_fragment(MDSContext::vec& waiters, bool replay);
- void init_fragment_pins();
-
-
- // -- authority --
- /*
- * normal: <parent,unknown> !subtree_root
- * delegation: <mds,unknown> subtree_root
- * ambiguous: <mds1,mds2> subtree_root
- * <parent,mds2> subtree_root
- */
- mds_authority_t dir_auth;
-
- std::string get_path() const;
-
- public:
mds_authority_t authority() const override;
mds_authority_t get_dir_auth() const { return dir_auth; }
void set_dir_auth(const mds_authority_t &a);
bool contains(CDir *x); // true if we are x or an ancestor of x
-
// for giving to clients
void get_dist_spec(std::set<mds_rank_t>& ls, mds_rank_t auth) {
if (is_rep()) {
_decode_base(p);
}
-
-
// -- state --
bool is_complete() { return state & STATE_COMPLETE; }
bool is_exporting() { return state & STATE_EXPORTING; }
void fetch(MDSContext *c, bool ignore_authpinnability=false);
void fetch(MDSContext *c, std::string_view want_dn, bool ignore_authpinnability=false);
void fetch(MDSContext *c, const std::set<dentry_key_t>& keys);
-protected:
- mempool::mds_co::compact_set<mempool::mds_co::string> wanted_items;
- void _omap_fetch(MDSContext *fin, const std::set<dentry_key_t>& keys);
- void _omap_fetch_more(
- bufferlist& hdrbl, std::map<std::string, bufferlist>& omap,
- MDSContext *fin);
- CDentry *_load_dentry(
- std::string_view key,
- std::string_view dname,
- snapid_t last,
- bufferlist &bl,
- int pos,
- const std::set<snapid_t> *snaps,
- bool *force_dirty);
-
- /**
- * Mark this fragment as BADFRAG (common part of go_bad and go_bad_dentry)
- */
- void _go_bad();
-
- /**
- * Go bad due to a damaged dentry (register with damagetable and go BADFRAG)
- */
- void go_bad_dentry(snapid_t last, std::string_view dname);
-
- /**
- * Go bad due to a damaged header (register with damagetable and go BADFRAG)
- */
- void go_bad(bool complete);
-
- void _omap_fetched(bufferlist& hdrbl, std::map<std::string, bufferlist>& omap,
- bool complete, int r);
-
- // -- commit --
- mempool::mds_co::compact_map<version_t, MDSContext::vec_alloc<mempool::mds_co::pool_allocator> > waiting_for_commit;
- void _commit(version_t want, int op_prio);
- void _omap_commit(int op_prio);
- void _encode_dentry(CDentry *dn, bufferlist& bl, const std::set<snapid_t> *snaps);
- void _committed(int r, version_t v);
-public:
#if 0 // unused?
void wait_for_commit(Context *c, version_t v=0);
#endif
void mark_complete();
-
// -- reference counting --
void first_get() override;
void last_put() override;
- // -- waiters --
-protected:
- mempool::mds_co::compact_map< string_snap_t, MDSContext::vec_alloc<mempool::mds_co::pool_allocator> > waiting_on_dentry; // FIXME string_snap_t not in mempool
-
-public:
bool is_waiting_for_dentry(std::string_view dname, snapid_t snap) {
return waiting_on_dentry.count(string_snap_t(dname, snap));
}
void add_waiter(uint64_t mask, MDSContext *c) override;
void take_waiting(uint64_t mask, MDSContext::vec& ls) override; // may include dentry waiters
void finish_waiting(uint64_t mask, int result = 0); // ditto
-
// -- import/export --
void encode_export(bufferlist& bl);
void adjust_nested_auth_pins(int dirinc, void *by);
void verify_fragstat();
- // -- freezing --
- struct freeze_tree_state_t {
- CDir *dir; // freezing/frozen tree root
- int auth_pins = 0;
- bool frozen = false;
- freeze_tree_state_t(CDir *d) : dir(d) {}
- };
- // all dirfrags within freezing/frozen tree reference the 'state'
- std::shared_ptr<freeze_tree_state_t> freeze_tree_state;
-
void _walk_tree(std::function<bool(CDir*)> cb);
bool freeze_tree();
void print(ostream& out) override;
void dump(Formatter *f, int flags = DUMP_DEFAULT) const;
void dump_load(Formatter *f);
+
+ // context
+ MDCache *cache;
+
+ CInode *inode; // my inode
+ frag_t frag; // my frag
+
+ fnode_t fnode;
+ snapid_t first;
+ mempool::mds_co::compact_map<snapid_t,old_rstat_t> dirty_old_rstat; // [value.first,key]
+
+ // my inodes with dirty rstat data
+ elist<CInode*> dirty_rstat_inodes;
+
+ elist<CDentry*> dirty_dentries;
+ elist<CDir*>::item item_dirty, item_new;
+
+ // all dirfrags within freezing/frozen tree reference the 'state'
+ std::shared_ptr<freeze_tree_state_t> freeze_tree_state;
+
+protected:
+ // friends
+ friend class Migrator;
+ friend class CInode;
+ friend class MDCache;
+ friend class MDiscover;
+ friend class MDBalancer;
+
+ friend class CDirDiscover;
+ friend class CDirExport;
+ friend class C_IO_Dir_TMAP_Fetched;
+ friend class C_IO_Dir_OMAP_Fetched;
+ friend class C_IO_Dir_OMAP_FetchedMore;
+ friend class C_IO_Dir_Committed;
+
+ void _omap_fetch(MDSContext *fin, const std::set<dentry_key_t>& keys);
+ void _omap_fetch_more(
+ bufferlist& hdrbl, std::map<std::string, bufferlist>& omap,
+ MDSContext *fin);
+ CDentry *_load_dentry(
+ std::string_view key,
+ std::string_view dname,
+ snapid_t last,
+ bufferlist &bl,
+ int pos,
+ const std::set<snapid_t> *snaps,
+ bool *force_dirty);
+
+ /**
+ * Mark this fragment as BADFRAG (common part of go_bad and go_bad_dentry)
+ */
+ void _go_bad();
+
+ /**
+ * Go bad due to a damaged dentry (register with damagetable and go BADFRAG)
+ */
+ void go_bad_dentry(snapid_t last, std::string_view dname);
+
+ /**
+ * Go bad due to a damaged header (register with damagetable and go BADFRAG)
+ */
+ void go_bad(bool complete);
+
+ void _omap_fetched(bufferlist& hdrbl, std::map<std::string, bufferlist>& omap,
+ bool complete, int r);
+
+ // -- commit --
+ void _commit(version_t want, int op_prio);
+ void _omap_commit(int op_prio);
+ void _encode_dentry(CDentry *dn, bufferlist& bl, const std::set<snapid_t> *snaps);
+ void _committed(int r, version_t v);
+
+ version_t projected_version;
+ mempool::mds_co::list<fnode_t> projected_fnode;
+
+ std::unique_ptr<scrub_info_t> scrub_infop; // FIXME not in mempool
+
+ // contents of this directory
+ dentry_key_map items; // non-null AND null
+ unsigned num_head_items;
+ unsigned num_head_null;
+ unsigned num_snap_items;
+ unsigned num_snap_null;
+
+ int num_dirty;
+
+ int num_inodes_with_caps = 0;
+
+ // state
+ version_t committing_version;
+ version_t committed_version;
+
+ mempool::mds_co::compact_set<mempool::mds_co::string> stale_items;
+
+ // lock nesting, freeze
+ static int num_frozen_trees;
+ static int num_freezing_trees;
+
+ int dir_auth_pins;
+
+ // cache control (defined for authority; hints for replicas)
+ __s32 dir_rep;
+ mempool::mds_co::compact_set<__s32> dir_rep_by; // if dir_rep == REP_LIST
+
+ // popularity
+ dirfrag_load_vec_t pop_me;
+ dirfrag_load_vec_t pop_nested;
+ dirfrag_load_vec_t pop_auth_subtree;
+ dirfrag_load_vec_t pop_auth_subtree_nested;
+
+ ceph::coarse_mono_time last_popularity_sample = ceph::coarse_mono_clock::zero();
+
+ load_spread_t pop_spread;
+
+ elist<CInode*> pop_lru_subdirs;
+
+ // and to provide density
+ int num_dentries_nested;
+ int num_dentries_auth_subtree;
+ int num_dentries_auth_subtree_nested;
+
+ std::unique_ptr<bloom_filter> bloom; // XXX not part of mempool::mds_co
+ /* If you set up the bloom filter, you must keep it accurate!
+ * It's deleted when you mark_complete() and is deliberately not serialized.*/
+
+ mempool::mds_co::compact_set<mempool::mds_co::string> wanted_items;
+ mempool::mds_co::compact_map<version_t, MDSContext::vec_alloc<mempool::mds_co::pool_allocator> > waiting_for_commit;
+
+ // -- waiters --
+ mempool::mds_co::compact_map< string_snap_t, MDSContext::vec_alloc<mempool::mds_co::pool_allocator> > waiting_on_dentry; // FIXME string_snap_t not in mempool
+
+private:
+ friend ostream& operator<<(ostream& out, const class CDir& dir);
+
+ void log_mark_dirty();
+
+ /**
+ * Create a scrub_info_t struct for the scrub_infop pointer.
+ */
+ void scrub_info_create() const;
+ /**
+ * Delete the scrub_infop if it's not got any useful data.
+ */
+ void scrub_maybe_delete_info();
+ /**
+ * Check the given set (presumably one of those in scrub_info_t) for the
+ * next key to scrub and look it up (or fail!).
+ */
+ int _next_dentry_on_set(dentry_key_set &dns, bool missing_okay,
+ MDSContext *cb, CDentry **dnout);
+
+ void link_inode_work( CDentry *dn, CInode *in );
+ void unlink_inode_work( CDentry *dn );
+ void remove_null_dentries();
+ void purge_stale_snap_data(const std::set<snapid_t>& snaps);
+
+ void prepare_new_fragment(bool replay);
+ void prepare_old_fragment(map<string_snap_t, MDSContext::vec >& dentry_waiters, bool replay);
+ void steal_dentry(CDentry *dn); // from another dir. used by merge/split.
+ void finish_old_fragment(MDSContext::vec& waiters, bool replay);
+ void init_fragment_pins();
+ std::string get_path() const;
+
+ // -- authority --
+ /*
+ * normal: <parent,unknown> !subtree_root
+ * delegation: <mds,unknown> subtree_root
+ * ambiguous: <mds1,mds2> subtree_root
+ * <parent,mds2> subtree_root
+ */
+ mds_authority_t dir_auth;
};
#endif