num_strays = mdc_stats['num_strays']
num_strays_purging = pq_stats['pq_executing']
num_purge_ops = pq_stats['pq_executing_ops']
+ files_high_water = pq_stats['pq_executing_high_water']
+ ops_high_water = pq_stats['pq_executing_ops_high_water']
- self.data_log.append([datetime.datetime.now(), num_strays, num_strays_purging, num_purge_ops])
-
- files_high_water = max(files_high_water, num_strays_purging)
- ops_high_water = max(ops_high_water, num_purge_ops)
+ self.data_log.append([datetime.datetime.now(), num_strays, num_strays_purging, num_purge_ops, files_high_water, ops_high_water])
total_strays_created = mdc_stats['strays_created']
total_strays_purged = pq_stats['pq_executed']
raise RuntimeError("Ops in flight high water is unexpectedly low ({0} / {1})".format(
ops_high_water, mds_max_purge_ops
))
+ # The MDS may go over mds_max_purge_ops for some items, like a
+ # heavily fragmented directory. The throttle does not kick in
+ # until *after* we reach or exceed the limit. This is expected
+ # because we don't want to starve the PQ or never purge a
+ # particularly large file/directory.
+ self.assertLessEqual(ops_high_water, mds_max_purge_ops+64)
elif throttle_type == self.FILES_THROTTLE:
if files_high_water < mds_max_purge_files / 2:
raise RuntimeError("Files in flight high water is unexpectedly low ({0} / {1})".format(
files_high_water, mds_max_purge_files
))
+ self.assertLessEqual(files_high_water, mds_max_purge_files)
# Sanity check all MDC stray stats
stats = self.fs.mds_asok(['perf', 'dump'])
pcb.set_prio_default(PerfCountersBuilder::PRIO_USEFUL);
pcb.add_u64(l_pq_executing_ops, "pq_executing_ops", "Purge queue ops in flight");
+ pcb.add_u64(l_pq_executing_ops_high_water, "pq_executing_ops_high_water", "Maximum number of executing file purge ops");
pcb.add_u64(l_pq_executing, "pq_executing", "Purge queue tasks in flight");
+ pcb.add_u64(l_pq_executing_high_water, "pq_executing_high_water", "Maximum number of executing file purges");
pcb.add_u64(l_pq_item_in_journal, "pq_item_in_journal", "Purge item left in journal");
logger.reset(pcb.create_perf_counters());
in_flight[expire_to] = item;
logger->set(l_pq_executing, in_flight.size());
+ files_high_water = std::max(files_high_water, in_flight.size());
+ logger->set(l_pq_executing_high_water, files_high_water);
auto ops = _calculate_ops(item);
ops_in_flight += ops;
logger->set(l_pq_executing_ops, ops_in_flight);
+ ops_high_water = std::max(ops_high_water, ops_in_flight);
+ logger->set(l_pq_executing_ops_high_water, ops_high_water);
SnapContext nullsnapc;
"dropping it" << dendl;
ops_in_flight -= ops;
logger->set(l_pq_executing_ops, ops_in_flight);
+ ops_high_water = std::max(ops_high_water, ops_in_flight);
+ logger->set(l_pq_executing_ops_high_water, ops_high_water);
in_flight.erase(expire_to);
logger->set(l_pq_executing, in_flight.size());
+ files_high_water = std::max(files_high_water, in_flight.size());
+ logger->set(l_pq_executing_high_water, files_high_water);
return;
}
ceph_assert(gather.has_subs());
ops_in_flight -= _calculate_ops(iter->second);
logger->set(l_pq_executing_ops, ops_in_flight);
+ ops_high_water = std::max(ops_high_water, ops_in_flight);
+ logger->set(l_pq_executing_ops_high_water, ops_high_water);
dout(10) << "completed item for ino " << iter->second.ino << dendl;
in_flight.erase(iter);
logger->set(l_pq_executing, in_flight.size());
+ files_high_water = std::max(files_high_water, in_flight.size());
+ logger->set(l_pq_executing_high_water, files_high_water);
dout(10) << "in_flight.size() now " << in_flight.size() << dendl;
uint64_t write_pos = journaler.get_write_pos();