OPTION(osd_tier_default_cache_hit_set_count, OPT_INT, 4)
OPTION(osd_tier_default_cache_hit_set_period, OPT_INT, 1200)
OPTION(osd_tier_default_cache_hit_set_type, OPT_STR, "bloom")
+OPTION(osd_tier_default_cache_min_read_recency_for_promote, OPT_INT, 1) // number of recent HitSets the object must appear in to be promoted (on read)
OPTION(osd_map_dedup, OPT_BOOL, true)
OPTION(osd_map_max_advance, OPT_INT, 200) // make this < cache_size!
"rename <srcpool> to <destpool>", "osd", "rw", "cli,rest")
COMMAND("osd pool get " \
"name=pool,type=CephPoolname " \
- "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|auid|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile", \
+ "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|auid|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote", \
"get pool parameter <var>", "osd", "r", "cli,rest")
COMMAND("osd pool set " \
"name=pool,type=CephPoolname " \
- "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hashpspool|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|debug_fake_ec_pool|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|auid " \
+ "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hashpspool|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|debug_fake_ec_pool|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|auid|min_read_recency_for_promote " \
"name=val,type=CephString " \
"name=force,type=CephChoices,strings=--yes-i-really-mean-it,req=false", \
"set pool parameter <var> to <val>", "osd", "rw", "cli,rest")
f->dump_unsigned("cache_min_evict_age", p->cache_min_evict_age);
} else if (var == "erasure_code_profile") {
f->dump_string("erasure_code_profile", p->erasure_code_profile);
+ } else if (var == "min_read_recency_for_promote") {
+ f->dump_int("min_read_recency_for_promote", p->min_read_recency_for_promote);
}
f->close_section();
ss << "cache_min_evict_age: " << p->cache_min_evict_age;
} else if (var == "erasure_code_profile") {
ss << "erasure_code_profile: " << p->erasure_code_profile;
+ } else if (var == "min_read_recency_for_promote") {
+ ss << "min_read_recency_for_promote: " << p->min_read_recency_for_promote;
}
rdata.append(ss);
return -EINVAL;
}
p.cache_min_evict_age = n;
+ } else if (var == "min_read_recency_for_promote") {
+ if (interr.length()) {
+ ss << "error parsing integer value '" << val << "': " << interr;
+ return -EINVAL;
+ }
+ p.min_read_recency_for_promote = n;
} else {
ss << "unrecognized variable '" << var << "'";
return -EINVAL;
ntp->cache_mode = mode;
ntp->hit_set_count = g_conf->osd_tier_default_cache_hit_set_count;
ntp->hit_set_period = g_conf->osd_tier_default_cache_hit_set_period;
+ ntp->min_read_recency_for_promote = g_conf->osd_tier_default_cache_min_read_recency_for_promote;
ntp->hit_set_params = hsp;
ntp->target_max_bytes = size;
ss << "pool '" << tierpoolstr << "' is now (or already was) a cache tier of '" << poolstr << "'";
}
}
+ bool in_hit_set = false;
if (hit_set) {
+ if (missing_oid != hobject_t() && hit_set->contains(missing_oid))
+ in_hit_set = true;
hit_set->insert(oid);
if (hit_set->is_full() ||
hit_set_start_stamp + pool.info.hit_set_period <= m->get_recv_stamp()) {
}
if ((m->get_flags() & CEPH_OSD_FLAG_IGNORE_CACHE) == 0 &&
- maybe_handle_cache(op, write_ordered, obc, r, missing_oid, false))
+ maybe_handle_cache(op, write_ordered, obc, r, missing_oid, false, in_hit_set))
return;
if (r) {
bool write_ordered,
ObjectContextRef obc,
int r, const hobject_t& missing_oid,
- bool must_promote)
+ bool must_promote,
+ bool in_hit_set)
{
if (obc)
dout(25) << __func__ << " " << obc->obs.oi << " "
if (!must_promote && can_skip_promote(op, obc)) {
return false;
}
- promote_object(op, obc, missing_oid);
+ if (op->may_write() || must_promote || !hit_set) {
+ promote_object(op, obc, missing_oid);
+ } else {
+ switch (pool.info.min_read_recency_for_promote) {
+ case 0:
+ promote_object(op, obc, missing_oid);
+ break;
+ case 1:
+ // Check if in the current hit set
+ if (in_hit_set) {
+ promote_object(op, obc, missing_oid);
+ } else {
+ do_cache_redirect(op, obc);
+ }
+ break;
+ default:
+ if (in_hit_set) {
+ promote_object(op, obc, missing_oid);
+ } else {
+ // Check if in other hit sets
+ map<time_t,HitSetRef>::iterator itor;
+ bool in_other_hit_sets = false;
+ for (itor = agent_state->hit_set_map.begin(); itor != agent_state->hit_set_map.end(); itor++) {
+ if (itor->second->contains(missing_oid)) {
+ in_other_hit_sets = true;
+ break;
+ }
+ }
+ if (in_other_hit_sets) {
+ promote_object(op, obc, missing_oid);
+ } else {
+ do_cache_redirect(op, obc);
+ }
+ }
+ break;
+ }
+ }
return true;
case pg_pool_t::CACHEMODE_FORWARD:
info.hit_set.current_info.end = now;
dout(20) << __func__ << " archive " << oid << dendl;
- if (agent_state)
+ if (agent_state) {
agent_state->add_hit_set(info.hit_set.current_info.begin, hit_set);
+ hit_set_in_memory_trim();
+ }
// hold a ref until it is flushed to disk
hit_set_flushing[info.hit_set.current_info.begin] = hit_set;
repop->ctx->op_t->remove(oid);
repop->ctx->log.back().mod_desc.mark_unrollbackable();
}
- if (agent_state)
- agent_state->remove_oldest_hit_set();
updated_hit_set_hist.history.pop_front();
ObjectContextRef obc = get_object_context(oid, false);
}
}
+void ReplicatedPG::hit_set_in_memory_trim()
+{
+ unsigned max = pool.info.hit_set_count;
+ unsigned max_in_memory = pool.info.min_read_recency_for_promote > 0 ? pool.info.min_read_recency_for_promote - 1 : 0;
+
+ if (max_in_memory > max) {
+ max_in_memory = max;
+ }
+ while (agent_state->hit_set_map.size() > max_in_memory) {
+ agent_state->remove_oldest_hit_set();
+ }
+}
+
// =======================================
// cache agent
else
agent_state->position = next;
+ // Discard old in memory HitSets
+ hit_set_in_memory_trim();
+
if (need_delay) {
assert(agent_state->delaying == false);
agent_delay();
void ReplicatedPG::agent_load_hit_sets()
{
if (agent_state->evict_mode == TierAgentState::EVICT_MODE_IDLE) {
- agent_state->discard_hit_sets();
return;
}
void hit_set_persist(); ///< persist hit info
bool hit_set_apply_log(); ///< apply log entries to update in-memory HitSet
void hit_set_trim(RepGather *repop, unsigned max); ///< discard old HitSets
+ void hit_set_in_memory_trim(); ///< discard old in memory HitSets
hobject_t get_hit_set_current_object(utime_t stamp);
hobject_t get_hit_set_archive_object(utime_t start, utime_t end);
bool write_ordered,
ObjectContextRef obc, int r,
const hobject_t& missing_oid,
- bool must_promote);
+ bool must_promote,
+ bool in_hit_set = false);
/**
* This helper function tells the client to redirect their request elsewhere.
*/
f->close_section(); // hit_set_params
f->dump_unsigned("hit_set_period", hit_set_period);
f->dump_unsigned("hit_set_count", hit_set_count);
+ f->dump_unsigned("min_read_recency_for_promote", min_read_recency_for_promote);
f->dump_unsigned("stripe_width", get_stripe_width());
}
return;
}
- ENCODE_START(15, 5, bl);
+ ENCODE_START(16, 5, bl);
::encode(type, bl);
::encode(size, bl);
::encode(crush_ruleset, bl);
::encode(cache_min_evict_age, bl);
::encode(erasure_code_profile, bl);
::encode(last_force_op_resend, bl);
+ ::encode(min_read_recency_for_promote, bl);
ENCODE_FINISH(bl);
}
void pg_pool_t::decode(bufferlist::iterator& bl)
{
- DECODE_START_LEGACY_COMPAT_LEN(15, 5, 5, bl);
+ DECODE_START_LEGACY_COMPAT_LEN(16, 5, 5, bl);
::decode(type, bl);
::decode(size, bl);
::decode(crush_ruleset, bl);
} else {
last_force_op_resend = 0;
}
+ if (struct_v >= 16) {
+ ::decode(min_read_recency_for_promote, bl);
+ } else {
+ pg_pool_t def;
+ min_read_recency_for_promote = def.min_read_recency_for_promote;
+ }
DECODE_FINISH(bl);
calc_pg_masks();
}
a.hit_set_params = HitSet::Params(new BloomHitSet::Params);
a.hit_set_period = 3600;
a.hit_set_count = 8;
+ a.min_read_recency_for_promote = 1;
a.set_stripe_width(12345);
a.target_max_bytes = 1238132132;
a.target_max_objects = 1232132;
<< " " << p.hit_set_period << "s"
<< " x" << p.hit_set_count;
}
+ if (p.min_read_recency_for_promote)
+ out << " min_read_recency_for_promote " << p.min_read_recency_for_promote;
out << " stripe_width " << p.get_stripe_width();
return out;
}
HitSet::Params hit_set_params; ///< The HitSet params to use on this pool
uint32_t hit_set_period; ///< periodicity of HitSet segments (seconds)
uint32_t hit_set_count; ///< number of periods to retain
+ uint32_t min_read_recency_for_promote; ///< minimum number of HitSet to check before promote
uint32_t stripe_width; ///< erasure coded stripe size in bytes
hit_set_params(),
hit_set_period(0),
hit_set_count(0),
+ min_read_recency_for_promote(0),
stripe_width(0)
{ }