OPTION(osd_hit_set_max_size, OPT_INT, 100000) // max target size for a HitSet
OPTION(osd_hit_set_namespace, OPT_STR, ".ceph-internal") // rados namespace for hit_set tracking
+OPTION(osd_tier_promote_max_objects_sec, OPT_U64, 0)
+OPTION(osd_tier_promote_max_bytes_sec, OPT_U64, 0)
+
OPTION(osd_tier_default_cache_mode, OPT_STR, "writeback")
OPTION(osd_tier_default_cache_hit_set_count, OPT_INT, 4)
OPTION(osd_tier_default_cache_hit_set_period, OPT_INT, 1200)
agent_stop_flag(false),
agent_timer_lock("OSD::agent_timer_lock"),
agent_timer(osd->client_messenger->cct, agent_timer_lock),
+ promote_probability_millis(1000),
objecter(new Objecter(osd->client_messenger->cct, osd->objecter_messenger, osd->monc, NULL, 0, 0)),
objecter_finisher(osd->client_messenger->cct),
watch_lock("OSD::watch_lock"),
// -------------------------------------
+void OSDService::promote_throttle_recalibrate()
+{
+ utime_t now = ceph_clock_now(NULL);
+ double dur = now - last_recalibrate;
+ last_recalibrate = now;
+ unsigned prob = promote_probability_millis;
+
+ uint64_t target_obj_sec = g_conf->osd_tier_promote_max_objects_sec;
+ uint64_t target_bytes_sec = g_conf->osd_tier_promote_max_bytes_sec;
+
+ unsigned min_prob = 1;
+
+ uint64_t attempts, obj, bytes;
+ promote_counter.sample_and_attenuate(&attempts, &obj, &bytes);
+ dout(10) << __func__ << " " << attempts << " attempts, promoted "
+ << obj << " objects and " << pretty_si_t(bytes) << " bytes; target "
+ << target_obj_sec << " obj/sec or "
+ << pretty_si_t(target_bytes_sec) << " bytes/sec"
+ << dendl;
+
+ // calculate what the probability *should* be, given the targets
+ unsigned new_prob;
+ if (attempts && dur > 0) {
+ uint64_t avg_size = 1;
+ if (obj)
+ avg_size = MAX(bytes / obj, 1);
+ unsigned po = (double)target_obj_sec * dur * 1000.0 / (double)attempts;
+ unsigned pb = (double)target_bytes_sec / (double)avg_size * dur * 1000.0
+ / (double)attempts;
+ derr << __func__ << " po " << po << " pb " << pb << " avg_size " << avg_size << dendl;
+ if (target_obj_sec && target_bytes_sec)
+ new_prob = MIN(po, pb);
+ else if (target_obj_sec)
+ new_prob = po;
+ else if (target_bytes_sec)
+ new_prob = pb;
+ else
+ new_prob = 1000;
+ } else {
+ new_prob = 1000;
+ }
+ dout(20) << __func__ << " new_prob " << new_prob << dendl;
+
+ // correct for persistent skew between target rate and actual rate, adjust
+ double ratio = 1.0;
+ unsigned actual = 0;
+ if (attempts && obj) {
+ actual = obj * 1000 / attempts;
+ ratio = (double)actual / (double)prob;
+ new_prob = (double)new_prob / ratio;
+ }
+ new_prob = MAX(new_prob, min_prob);
+ new_prob = MIN(new_prob, 1000);
+
+ // adjust
+ prob = (prob + new_prob) / 2;
+ prob = MAX(prob, min_prob);
+ prob = MIN(prob, 1000);
+ dout(10) << __func__ << " actual " << actual
+ << ", actual/prob ratio " << ratio
+ << ", adjusted new_prob " << new_prob
+ << ", prob " << promote_probability_millis << " -> " << prob
+ << dendl;
+ promote_probability_millis = prob;
+}
+
+// -------------------------------------
+
float OSDService::get_full_ratio()
{
float full_ratio = cct->_conf->osd_failsafe_full_ratio;
recovery_tp.wake();
check_replay_queue();
+
+ service.promote_throttle_recalibrate();
}
// only do waiters if dispatch() isn't currently running. (if it is,
flush_mode_high_count --;
}
+ /// throttle promotion attempts
+ unsigned promote_probability_millis; ///< probability thousands. one word.
+ PromoteCounter promote_counter;
+ utime_t last_recalibrate;
+
+ bool promote_throttle() {
+ // NOTE: lockless! we rely on the probability being a single word.
+ promote_counter.attempt();
+ if ((unsigned)rand() % 1000 > promote_probability_millis)
+ return true; // yes throttle (no promote)
+ return false; // no throttle (promote)
+ }
+ void promote_finish(uint64_t bytes) {
+ promote_counter.finish(bytes);
+ }
+ void promote_throttle_recalibrate();
// -- Objecter, for teiring reads/writes from/to other OSDs --
Objecter *objecter;
switch (recency) {
case 0:
- promote_object(obc, missing_oid, oloc, promote_op, promote_obc);
break;
case 1:
// Check if in the current hit set
if (in_hit_set) {
- promote_object(obc, missing_oid, oloc, promote_op, promote_obc);
+ break;
} else {
// not promoting
return false;
}
}
if (count >= recency) {
- promote_object(obc, missing_oid, oloc, promote_op, promote_obc);
- } else {
- // not promoting
- return false;
+ break;
}
+ return false; // not promoting
}
break;
}
+ if (osd->promote_throttle()) {
+ dout(10) << __func__ << " promote throttled" << dendl;
+ return false;
+ }
+ promote_object(obc, missing_oid, oloc, promote_op, promote_obc);
return true;
}
return;
}
+ osd->promote_finish(results->object_size);
+
OpContextUPtr tctx = simple_opc_create(obc);
tctx->at_version = get_next_version();