``filestore split multiple``
-:Description: ``filestore_split_multiple * abs(filestore_merge_threshold) * 16``
+:Description: ``(filestore_split_multiple * abs(filestore_merge_threshold) + (rand() % filestore_split_rand_factor)) * 16``
is the maximum number of files in a subdirectory before
splitting into child directories.
:Default: ``2``
+``filestore split rand factor``
+
+:Description: A random factor added to the split threshold to avoid
+ too many filestore splits occurring at once. See
+ ``filestore split multiple`` for details.
+ This can only be changed for an existing osd offline,
+ via ceph-objectstore-tool's apply-layout-settings command.
+
+:Type: Unsigned 32-bit Integer
+:Required: No
+:Default: ``20``
+
+
``filestore update to``
:Description: Limits filestore auto upgrade to specified version.
if (preload_erasure_code() < 0)
return -1;
+ srand(time(NULL) + getpid());
+
osd = new OSD(g_ceph_context,
store,
whoami,
OPTION(filestore_fiemap_threshold, OPT_INT, 4096)
OPTION(filestore_merge_threshold, OPT_INT, 10)
OPTION(filestore_split_multiple, OPT_INT, 2)
+OPTION(filestore_split_rand_factor, OPT_U32, 20) // randomize the split threshold by adding 16 * [0, rand_factor)
OPTION(filestore_update_to, OPT_INT, 1000)
OPTION(filestore_blackhole, OPT_BOOL, false) // drop any new transactions on the floor
OPTION(filestore_fd_cache_size, OPT_INT, 128) // FD lru size
virtual int apply_layout_settings() { assert(0); return 0; }
+ /// Read index-wide settings (should be called after construction)
+ virtual int read_settings() { return 0; }
+
/// Virtual destructor
virtual ~CollectionIndex() {}
};
#define dout_subsys ceph_subsys_filestore
const string HashIndex::SUBDIR_ATTR = "contents";
+const string HashIndex::SETTINGS_ATTR = "settings";
const string HashIndex::IN_PROGRESS_OP_TAG = "in_progress_op";
/// hex digit to integer value
int HashIndex::apply_layout_settings() {
vector<string> path;
dout(10) << __func__ << " split multiple = " << split_multiplier
- << " merge threshold = " << merge_threshold << dendl;
+ << " merge threshold = " << merge_threshold
+ << " split rand factor = " << g_conf->filestore_split_rand_factor
+ << dendl;
+ int r = write_settings();
+ if (r < 0)
+ return r;
return split_dirs(path);
}
int HashIndex::_init() {
subdir_info_s info;
vector<string> path;
- return set_info(path, info);
+ int r = set_info(path, info);
+ if (r < 0)
+ return r;
+ return write_settings();
+}
+
+int HashIndex::write_settings() {
+ if (g_conf->filestore_split_rand_factor > 0) {
+ settings.split_rand_factor = rand() % g_conf->filestore_split_rand_factor;
+ } else {
+ settings.split_rand_factor = 0;
+ }
+ vector<string> path;
+ bufferlist bl;
+ settings.encode(bl);
+ return add_attr_path(path, SETTINGS_ATTR, bl);
+}
+
+int HashIndex::read_settings() {
+ vector<string> path;
+ bufferlist bl;
+ int r = get_attr_path(path, SETTINGS_ATTR, bl);
+ if (r == -ENODATA)
+ return 0;
+ if (r < 0) {
+ derr << __func__ << " error reading settings: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+ bufferlist::iterator it = bl.begin();
+ settings.decode(it);
+ dout(20) << __func__ << " split_rand_factor = " << settings.split_rand_factor << dendl;
+ return 0;
}
/* LFNIndex virtual method implementations */
// Calculate the number of leaf folders (which actually store files)
// need to be created
- const uint64_t objs_per_folder = (uint64_t)(abs(merge_threshold)) * (uint64_t)split_multiplier * 16;
+ const uint64_t objs_per_folder = ((uint64_t)(abs(merge_threshold)) * (uint64_t)split_multiplier + settings.split_rand_factor) * 16;
uint64_t leavies = expected_num_objs / objs_per_folder ;
// No need to split
if (leavies == 0 || expected_num_objs == objs_per_folder)
bool HashIndex::must_split(const subdir_info_s &info) {
return (info.hash_level < (unsigned)MAX_HASH_LEVEL &&
- info.objs > ((unsigned)(abs(merge_threshold)) * 16 * split_multiplier));
+ info.objs > ((unsigned)(abs(merge_threshold) * split_multiplier + settings.split_rand_factor) * 16));
}
* ex: ghobject_t("object", CEPH_NO_SNAP, 0xA4CEE0D2)
* would be located in (root)/2/D/0/
*
- * Subdirectories are created when the number of objects in a directory
- * exceed (abs(merge_threshhold)) * 16 * split_multiplier. The number of objects in a directory
- * is encoded as subdir_info_s in an xattr on the directory.
+ * Subdirectories are created when the number of objects in a
+ * directory exceed 16 * (abs(merge_threshhold)) * split_multiplier +
+ * split_rand_factor). The number of objects in a directory is encoded
+ * as subdir_info_s in an xattr on the directory.
*/
class HashIndex : public LFNIndex {
private:
/// Attribute name for storing subdir info @see subdir_info_s
static const string SUBDIR_ATTR;
+ /// Attribute name for storing index-wide settings
+ static const string SETTINGS_ATTR;
/// Attribute name for storing in progress op tag
static const string IN_PROGRESS_OP_TAG;
/// Size (bits) in object hash
/**
* Merges occur when the number of object drops below
* merge_threshold and splits occur when the number of objects
- * exceeds 16 * abs(merge_threshold) * split_multiplier.
- * Please note if merge_threshold is less than zero, it will never do merging
+ * exceeds:
+ *
+ * 16 * (abs(merge_threshold) * split_multiplier + split_rand_factor)
+ *
+ * Please note if merge_threshold is less than zero, it will never
+ * do merging
*/
int merge_threshold;
int split_multiplier;
}
};
+ struct settings_s {
+ uint32_t split_rand_factor; ///< random factor added to split threshold (only on root of collection)
+ settings_s() : split_rand_factor(0) {}
+ void encode(bufferlist &bl) const
+ {
+ __u8 v = 1;
+ ::encode(v, bl);
+ ::encode(split_rand_factor, bl);
+ }
+ void decode(bufferlist::iterator &bl)
+ {
+ __u8 v;
+ ::decode(v, bl);
+ ::decode(split_rand_factor, bl);
+ }
+ } settings;
+
/// Encodes in progress split or merge
struct InProgressOp {
static const int SPLIT = 0;
double retry_probability=0) ///< [in] retry probability
: LFNIndex(collection, base_path, index_version, retry_probability),
merge_threshold(merge_at),
- split_multiplier(split_multiple) {}
+ split_multiplier(split_multiple)
+ {}
+
+ int read_settings() override;
/// @see CollectionIndex
uint32_t collection_version() { return index_version; }
/// split each dir below the given path
int split_dirs(const vector<string> &path);
+
+ int write_settings();
};
#endif
g_conf->filestore_split_multiple,
version,
g_conf->filestore_index_retry_probability);
- return index.init();
+ r = index.init();
+ if (r < 0)
+ return r;
+ return index.read_settings();
}
int IndexManager::build_index(coll_t c, const char *path, CollectionIndex **index) {
// Must be a HashIndex
*index = new HashIndex(c, path, g_conf->filestore_merge_threshold,
g_conf->filestore_split_multiple, version);
- return 0;
+ return (*index)->read_settings();
}
default: assert(0);
}
g_conf->filestore_split_multiple,
CollectionIndex::HOBJECT_WITH_POOL,
g_conf->filestore_index_retry_probability);
- return 0;
+ return (*index)->read_settings();
}
}