From: J. Eric Ivancich Date: Wed, 27 Sep 2017 17:37:51 +0000 (-0400) Subject: Merge commit 'b6374d40bcaa4b73d440bb6d8ed42838dec57566' into wip-bring-in-latest... X-Git-Tag: v13.0.1~697^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=7ba15ebab8f89e78ca59e5f0fc64eec93928dfb3;p=ceph.git Merge commit 'b6374d40bcaa4b73d440bb6d8ed42838dec57566' into wip-bring-in-latest-dmclock Signed-off-by: J. Eric Ivancich --- 7ba15ebab8f89e78ca59e5f0fc64eec93928dfb3 diff --cc src/dmclock/sim/src/config.cc index a6702897cd6f,000000000000..a55ba9a47bcc mode 100644,000000..100644 --- a/src/dmclock/sim/src/config.cc +++ b/src/dmclock/sim/src/config.cc @@@ -1,171 -1,0 +1,173 @@@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + + +#include +#include +#include + +#include +#include +#include + +#include "config.h" +#include "str_list.h" + + +static void dashes_to_underscores(const char *input, char *output) { + char c = 0; + char *o = output; + const char *i = input; + // first two characters are copied as-is + *o = *i++; + if (*o++ == '\0') + return; + *o = *i++; + if (*o++ == '\0') + return; + for (; ((c = *i)); ++i) { + if (c == '=') { + strcpy(o, i); + return; + } + if (c == '-') + *o++ = '_'; + else + *o++ = c; + } + *o++ = '\0'; +} + +static int va_ceph_argparse_witharg(std::vector &args, + std::vector::iterator &i, std::string *ret, + std::ostream &oss, va_list ap) { + const char *first = *i; + char tmp[strlen(first)+1]; + dashes_to_underscores(first, tmp); + first = tmp; + + // does this argument match any of the possibilities? + while (1) { + const char *a = va_arg(ap, char*); + if (a == NULL) + return 0; + int strlen_a = strlen(a); + char a2[strlen_a+1]; + dashes_to_underscores(a, a2); + if (strncmp(a2, first, strlen(a2)) == 0) { + if (first[strlen_a] == '=') { + *ret = first + strlen_a + 1; + i = args.erase(i); + return 1; + } + else if (first[strlen_a] == '\0') { + // find second part (or not) + if (i+1 == args.end()) { + oss << "Option " << *i << " requires an argument." << std::endl; + i = args.erase(i); + return -EINVAL; + } + i = args.erase(i); + *ret = *i; + i = args.erase(i); + return 1; + } + } + } +} + +bool crimson::qos_simulation::ceph_argparse_witharg(std::vector &args, + std::vector::iterator &i, std::string *ret, ...) { + int r; + va_list ap; + va_start(ap, ret); + r = va_ceph_argparse_witharg(args, i, ret, std::cerr, ap); + va_end(ap); + if (r < 0) + _exit(1); + return r != 0; +} + +void crimson::qos_simulation::ceph_argparse_early_args(std::vector& args, std::string *conf_file_list) { + std::string val; + + std::vector orig_args = args; + + for (std::vector::iterator i = args.begin(); i != args.end(); ) { + if (ceph_argparse_witharg(args, i, &val, "--conf", "-c", (char*)NULL)) { + *conf_file_list = val; + } + else { + // ignore + ++i; + } + } + return; +} + +static bool stobool(const std::string & v) { + return !v.empty () && + (strcasecmp (v.c_str (), "true") == 0 || + atoi (v.c_str ()) != 0); +} + +int crimson::qos_simulation::parse_config_file(const std::string &fname, sim_config_t &g_conf) { + ConfFile cf; + std::deque err; + std::ostringstream warn; + int ret = cf.parse_file(fname.c_str(), &err, &warn); + if (ret) { + // error + return ret; + } + + std::string val; + if (!cf.read("global", "server_groups", val)) + g_conf.server_groups = std::stoul(val); + if (!cf.read("global", "client_groups", val)) + g_conf.client_groups = std::stoul(val); + if (!cf.read("global", "server_random_selection", val)) + g_conf.server_random_selection = stobool(val); + if (!cf.read("global", "server_soft_limit", val)) + g_conf.server_soft_limit = stobool(val); ++ if (!cf.read("global", "anticipation_timeout", val)) ++ g_conf.anticipation_timeout = stod(val); + + for (uint i = 0; i < g_conf.server_groups; i++) { + srv_group_t st; + std::string section = "server." + std::to_string(i); + if (!cf.read(section, "server_count", val)) + st.server_count = std::stoul(val); + if (!cf.read(section, "server_iops", val)) + st.server_iops = std::stoul(val); + if (!cf.read(section, "server_threads", val)) + st.server_threads = std::stoul(val); + g_conf.srv_group.push_back(st); + } + + for (uint i = 0; i < g_conf.client_groups; i++) { + cli_group_t ct; + std::string section = "client." + std::to_string(i); + if (!cf.read(section, "client_count", val)) + ct.client_count = std::stoul(val); + if (!cf.read(section, "client_wait", val)) + ct.client_wait = std::chrono::seconds(std::stoul(val)); + if (!cf.read(section, "client_total_ops", val)) + ct.client_total_ops = std::stoul(val); + if (!cf.read(section, "client_server_select_range", val)) + ct.client_server_select_range = std::stoul(val); + if (!cf.read(section, "client_iops_goal", val)) + ct.client_iops_goal = std::stoul(val); + if (!cf.read(section, "client_outstanding_ops", val)) + ct.client_outstanding_ops = std::stoul(val); + if (!cf.read(section, "client_reservation", val)) + ct.client_reservation = std::stod(val); + if (!cf.read(section, "client_limit", val)) + ct.client_limit = std::stod(val); + if (!cf.read(section, "client_weight", val)) + ct.client_weight = std::stod(val); + g_conf.cli_group.push_back(ct); + } + + return 0; +} diff --cc src/dmclock/sim/src/config.h index 010f33a743ea,000000000000..e85c69d07451 mode 100644,000000..100644 --- a/src/dmclock/sim/src/config.h +++ b/src/dmclock/sim/src/config.h @@@ -1,138 -1,0 +1,143 @@@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + + +#pragma once + + +#include + +#include +#include +#include +#include + +#include "ConfUtils.h" + + +namespace crimson { + namespace qos_simulation { + + struct cli_group_t { + uint client_count; + std::chrono::seconds client_wait; + uint client_total_ops; + uint client_server_select_range; + uint client_iops_goal; + uint client_outstanding_ops; + double client_reservation; + double client_limit; + double client_weight; + + cli_group_t(uint _client_count = 100, + uint _client_wait = 0, + uint _client_total_ops = 1000, + uint _client_server_select_range = 10, + uint _client_iops_goal = 50, + uint _client_outstanding_ops = 100, + double _client_reservation = 20.0, + double _client_limit = 60.0, + double _client_weight = 1.0) : + client_count(_client_count), + client_wait(std::chrono::seconds(_client_wait)), + client_total_ops(_client_total_ops), + client_server_select_range(_client_server_select_range), + client_iops_goal(_client_iops_goal), + client_outstanding_ops(_client_outstanding_ops), + client_reservation(_client_reservation), + client_limit(_client_limit), + client_weight(_client_weight) + { + // empty + } + + friend std::ostream& operator<<(std::ostream& out, + const cli_group_t& cli_group) { + out << + "client_count = " << cli_group.client_count << "\n" << + "client_wait = " << cli_group.client_wait.count() << "\n" << + "client_total_ops = " << cli_group.client_total_ops << "\n" << + "client_server_select_range = " << cli_group.client_server_select_range << "\n" << + "client_iops_goal = " << cli_group.client_iops_goal << "\n" << + "client_outstanding_ops = " << cli_group.client_outstanding_ops << "\n" << + std::fixed << std::setprecision(1) << + "client_reservation = " << cli_group.client_reservation << "\n" << + "client_limit = " << cli_group.client_limit << "\n" << + "client_weight = " << cli_group.client_weight; + return out; + } + }; // class cli_group_t + + + struct srv_group_t { + uint server_count; + uint server_iops; + uint server_threads; + + srv_group_t(uint _server_count = 100, + uint _server_iops = 40, + uint _server_threads = 1) : + server_count(_server_count), + server_iops(_server_iops), + server_threads(_server_threads) + { + // empty + } + + friend std::ostream& operator<<(std::ostream& out, + const srv_group_t& srv_group) { + out << + "server_count = " << srv_group.server_count << "\n" << + "server_iops = " << srv_group.server_iops << "\n" << + "server_threads = " << srv_group.server_threads; + return out; + } + }; // class srv_group_t + + + struct sim_config_t { + uint server_groups; + uint client_groups; + bool server_random_selection; + bool server_soft_limit; ++ double anticipation_timeout; + + std::vector cli_group; + std::vector srv_group; + + sim_config_t(uint _server_groups = 1, + uint _client_groups = 1, + bool _server_random_selection = false, - bool _server_soft_limit = true) : ++ bool _server_soft_limit = true, ++ double _anticipation_timeout = 0.0) : + server_groups(_server_groups), + client_groups(_client_groups), + server_random_selection(_server_random_selection), - server_soft_limit(_server_soft_limit) ++ server_soft_limit(_server_soft_limit), ++ anticipation_timeout(_anticipation_timeout) + { + srv_group.reserve(server_groups); + cli_group.reserve(client_groups); + } + + friend std::ostream& operator<<(std::ostream& out, + const sim_config_t& sim_config) { + out << + "server_groups = " << sim_config.server_groups << "\n" << + "client_groups = " << sim_config.client_groups << "\n" << + "server_random_selection = " << sim_config.server_random_selection << "\n" << - "server_soft_limit = " << sim_config.server_soft_limit; ++ "server_soft_limit = " << sim_config.server_soft_limit << "\n" << ++ std::fixed << std::setprecision(3) << ++ "anticipation_timeout = " << sim_config.anticipation_timeout; + return out; + } + }; // class sim_config_t + + + bool ceph_argparse_witharg(std::vector &args, + std::vector::iterator &i, std::string *ret, ...); + void ceph_argparse_early_args(std::vector& args, std::string *conf_file_list); + int parse_config_file(const std::string &fname, sim_config_t &g_conf); + + }; // namespace qos_simulation +}; // namespace crimson diff --cc src/dmclock/sim/src/test_dmclock.h index 7f1e55439edc,000000000000..9728b45f935a mode 100644,000000..100644 --- a/src/dmclock/sim/src/test_dmclock.h +++ b/src/dmclock/sim/src/test_dmclock.h @@@ -1,56 -1,0 +1,57 @@@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +/* + * Copyright (C) 2016 Red Hat Inc. + */ + + +#include "dmclock_recs.h" +#include "dmclock_server.h" +#include "dmclock_client.h" + +#include "sim_recs.h" +#include "sim_server.h" +#include "sim_client.h" + +#include "simulate.h" + + +namespace crimson { + namespace test_dmc { + + namespace dmc = crimson::dmclock; + namespace sim = crimson::qos_simulation; + + struct DmcAccum { + uint64_t reservation_count = 0; + uint64_t proportion_count = 0; + }; + + using DmcQueue = dmc::PushPriorityQueue; ++ using DmcServiceTracker = dmc::ServiceTracker; + + using DmcServer = sim::SimulatedServer; + - using DmcClient = sim::SimulatedClient, ++ using DmcClient = sim::SimulatedClient; + + using CreateQueueF = std::function; + + using MySim = sim::Simulation; + + using SubmitFunc = DmcClient::SubmitFunc; + + extern void dmc_server_accumulate_f(DmcAccum& a, + const dmc::PhaseType& phase); + + extern void dmc_client_accumulate_f(DmcAccum& a, + const dmc::PhaseType& phase); + } // namespace test_dmc +} // namespace crimson diff --cc src/dmclock/sim/src/test_dmclock_main.cc index 57b733e860ce,000000000000..f59b735465ba mode 100644,000000..100644 --- a/src/dmclock/sim/src/test_dmclock_main.cc +++ b/src/dmclock/sim/src/test_dmclock_main.cc @@@ -1,324 -1,0 +1,329 @@@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +/* + * Copyright (C) 2016 Red Hat Inc. + */ + + +#include "test_dmclock.h" +#include "config.h" + +#ifdef PROFILE +#include "profile.h" +#endif + + +namespace dmc = crimson::dmclock; +namespace test = crimson::test_dmc; +namespace sim = crimson::qos_simulation; + +using namespace std::placeholders; + + +namespace crimson { + namespace test_dmc { + void server_data(std::ostream& out, + test::MySim* sim, + test::MySim::ServerFilter server_disp_filter, + int head_w, int data_w, int data_prec); + + void client_data(std::ostream& out, + test::MySim* sim, + test::MySim::ClientFilter client_disp_filter, + int head_w, int data_w, int data_prec); + } +} + + +int main(int argc, char* argv[]) { + std::vector args; + for (int i = 1; i < argc; ++i) { + args.push_back(argv[i]); + } + + std::string conf_file_list; + sim::ceph_argparse_early_args(args, &conf_file_list); + + sim::sim_config_t g_conf; + std::vector &cli_group = g_conf.cli_group; + std::vector &srv_group = g_conf.srv_group; + + if (!conf_file_list.empty()) { + int ret; + ret = sim::parse_config_file(conf_file_list, g_conf); + if (ret) { + // error + _exit(1); + } + } else { + // default simulation parameter + g_conf.client_groups = 2; + + sim::srv_group_t st; + srv_group.push_back(st); + + sim::cli_group_t ct1(99, 0); + cli_group.push_back(ct1); + + sim::cli_group_t ct2(1, 10); + cli_group.push_back(ct2); + } + + const uint server_groups = g_conf.server_groups; + const uint client_groups = g_conf.client_groups; + const bool server_random_selection = g_conf.server_random_selection; + const bool server_soft_limit = g_conf.server_soft_limit; ++ const double anticipation_timeout = g_conf.anticipation_timeout; + uint server_total_count = 0; + uint client_total_count = 0; + + for (uint i = 0; i < client_groups; ++i) { + client_total_count += cli_group[i].client_count; + } + + for (uint i = 0; i < server_groups; ++i) { + server_total_count += srv_group[i].server_count; + } + + std::vector client_info; + for (uint i = 0; i < client_groups; ++i) { + client_info.push_back(test::dmc::ClientInfo + { cli_group[i].client_reservation, + cli_group[i].client_weight, + cli_group[i].client_limit } ); + } + + auto ret_client_group_f = [&](const ClientId& c) -> uint { + uint group_max = 0; + uint i = 0; + for (; i < client_groups; ++i) { + group_max += cli_group[i].client_count; + if (c < group_max) { + break; + } + } + return i; + }; + + auto ret_server_group_f = [&](const ServerId& s) -> uint { + uint group_max = 0; + uint i = 0; + for (; i < server_groups; ++i) { + group_max += srv_group[i].server_count; + if (s < group_max) { + break; + } + } + return i; + }; + + auto client_info_f = [=](const ClientId& c) -> test::dmc::ClientInfo { + return client_info[ret_client_group_f(c)]; + }; + + auto client_disp_filter = [=] (const ClientId& i) -> bool { + return i < 3 || i >= (client_total_count - 3); + }; + + auto server_disp_filter = [=] (const ServerId& i) -> bool { + return i < 3 || i >= (server_total_count - 3); + }; + + + test::MySim *simulation; + + + // lambda to post a request to the identified server; called by client + test::SubmitFunc server_post_f = + [&simulation](const ServerId& server, + sim::TestRequest&& request, + const ClientId& client_id, + const test::dmc::ReqParams& req_params) { + test::DmcServer& s = simulation->get_server(server); + s.post(std::move(request), client_id, req_params); + }; + + std::vector> cli_inst; + for (uint i = 0; i < client_groups; ++i) { + if (cli_group[i].client_wait == std::chrono::seconds(0)) { + cli_inst.push_back( + { { sim::req_op, + (uint32_t)cli_group[i].client_total_ops, + (double)cli_group[i].client_iops_goal, + (uint16_t)cli_group[i].client_outstanding_ops } } ); + } else { + cli_inst.push_back( + { { sim::wait_op, cli_group[i].client_wait }, + { sim::req_op, + (uint32_t)cli_group[i].client_total_ops, + (double)cli_group[i].client_iops_goal, + (uint16_t)cli_group[i].client_outstanding_ops } } ); + } + } + + simulation = new test::MySim(); + + test::DmcServer::ClientRespFunc client_response_f = + [&simulation](ClientId client_id, + const sim::TestResponse& resp, + const ServerId& server_id, + const dmc::PhaseType& phase) { + simulation->get_client(client_id).receive_response(resp, + server_id, + phase); + }; + + test::CreateQueueF create_queue_f = + [&](test::DmcQueue::CanHandleRequestFunc can_f, + test::DmcQueue::HandleRequestFunc handle_f) -> test::DmcQueue* { - return new test::DmcQueue(client_info_f, can_f, handle_f, server_soft_limit); ++ return new test::DmcQueue(client_info_f, ++ can_f, ++ handle_f, ++ server_soft_limit, ++ anticipation_timeout); + }; + + + auto create_server_f = [&](ServerId id) -> test::DmcServer* { + uint i = ret_server_group_f(id); + return new test::DmcServer(id, + srv_group[i].server_iops, + srv_group[i].server_threads, + client_response_f, + test::dmc_server_accumulate_f, + create_queue_f); + }; + + auto create_client_f = [&](ClientId id) -> test::DmcClient* { + uint i = ret_client_group_f(id); + test::MySim::ClientBasedServerSelectFunc server_select_f; + uint client_server_select_range = cli_group[i].client_server_select_range; + if (!server_random_selection) { + server_select_f = simulation->make_server_select_alt_range(client_server_select_range); + } else { + server_select_f = simulation->make_server_select_ran_range(client_server_select_range); + } + return new test::DmcClient(id, + server_post_f, + std::bind(server_select_f, _1, id), + test::dmc_client_accumulate_f, + cli_inst[i]); + }; + +#if 1 + std::cout << "[global]" << std::endl << g_conf << std::endl; + for (uint i = 0; i < client_groups; ++i) { + std::cout << std::endl << "[client." << i << "]" << std::endl; + std::cout << cli_group[i] << std::endl; + } + for (uint i = 0; i < server_groups; ++i) { + std::cout << std::endl << "[server." << i << "]" << std::endl; + std::cout << srv_group[i] << std::endl; + } + std::cout << std::endl; +#endif + + simulation->add_servers(server_total_count, create_server_f); + simulation->add_clients(client_total_count, create_client_f); + + simulation->run(); + simulation->display_stats(std::cout, + &test::server_data, &test::client_data, + server_disp_filter, client_disp_filter); + + delete simulation; +} // main + + +void test::client_data(std::ostream& out, - test::MySim* sim, - test::MySim::ClientFilter client_disp_filter, - int head_w, int data_w, int data_prec) { ++ test::MySim* sim, ++ test::MySim::ClientFilter client_disp_filter, ++ int head_w, int data_w, int data_prec) { + // report how many ops were done by reservation and proportion for + // each client + + int total_r = 0; + out << std::setw(head_w) << "res_ops:"; + for (uint i = 0; i < sim->get_client_count(); ++i) { + const auto& client = sim->get_client(i); + auto r = client.get_accumulator().reservation_count; + total_r += r; + if (!client_disp_filter(i)) continue; + out << " " << std::setw(data_w) << r; + } + out << " " << std::setw(data_w) << std::setprecision(data_prec) << + std::fixed << total_r << std::endl; + + int total_p = 0; + out << std::setw(head_w) << "prop_ops:"; + for (uint i = 0; i < sim->get_client_count(); ++i) { + const auto& client = sim->get_client(i); + auto p = client.get_accumulator().proportion_count; + total_p += p; + if (!client_disp_filter(i)) continue; + out << " " << std::setw(data_w) << p; + } + out << " " << std::setw(data_w) << std::setprecision(data_prec) << + std::fixed << total_p << std::endl; +} + + +void test::server_data(std::ostream& out, - test::MySim* sim, - test::MySim::ServerFilter server_disp_filter, - int head_w, int data_w, int data_prec) { ++ test::MySim* sim, ++ test::MySim::ServerFilter server_disp_filter, ++ int head_w, int data_w, int data_prec) { + out << std::setw(head_w) << "res_ops:"; + int total_r = 0; + for (uint i = 0; i < sim->get_server_count(); ++i) { + const auto& server = sim->get_server(i); + auto rc = server.get_accumulator().reservation_count; + total_r += rc; + if (!server_disp_filter(i)) continue; + out << " " << std::setw(data_w) << rc; + } + out << " " << std::setw(data_w) << std::setprecision(data_prec) << + std::fixed << total_r << std::endl; + + out << std::setw(head_w) << "prop_ops:"; + int total_p = 0; + for (uint i = 0; i < sim->get_server_count(); ++i) { + const auto& server = sim->get_server(i); + auto pc = server.get_accumulator().proportion_count; + total_p += pc; + if (!server_disp_filter(i)) continue; + out << " " << std::setw(data_w) << pc; + } + out << " " << std::setw(data_w) << std::setprecision(data_prec) << + std::fixed << total_p << std::endl; + + const auto& q = sim->get_server(0).get_priority_queue(); + out << std::endl << + " k-way heap: " << q.get_heap_branching_factor() << std::endl + << std::endl; + +#ifdef PROFILE + crimson::ProfileCombiner art_combiner; + crimson::ProfileCombiner rct_combiner; + for (uint i = 0; i < sim->get_server_count(); ++i) { + const auto& q = sim->get_server(i).get_priority_queue(); + const auto& art = q.add_request_timer; + art_combiner.combine(art); + const auto& rct = q.request_complete_timer; + rct_combiner.combine(rct); + } + out << "Server add_request_timer: count:" << art_combiner.get_count() << + ", mean:" << art_combiner.get_mean() << + ", std_dev:" << art_combiner.get_std_dev() << + ", low:" << art_combiner.get_low() << + ", high:" << art_combiner.get_high() << std::endl; + out << "Server request_complete_timer: count:" << rct_combiner.get_count() << + ", mean:" << rct_combiner.get_mean() << + ", std_dev:" << rct_combiner.get_std_dev() << + ", low:" << rct_combiner.get_low() << + ", high:" << rct_combiner.get_high() << std::endl; + out << "Server combined mean: " << + (art_combiner.get_mean() + rct_combiner.get_mean()) << + std::endl; +#endif +} diff --cc src/dmclock/src/dmclock_client.h index 92f4cf83bb1a,000000000000..e0280ab311c1 mode 100644,000000..100644 --- a/src/dmclock/src/dmclock_client.h +++ b/src/dmclock/src/dmclock_client.h @@@ -1,193 -1,0 +1,274 @@@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +/* + * Copyright (C) 2017 Red Hat Inc. + */ + + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "run_every.h" +#include "dmclock_util.h" +#include "dmclock_recs.h" + + +namespace crimson { + namespace dmclock { - struct ServerInfo { ++ ++ // OrigTracker is a best-effort implementation of the the original ++ // dmClock calculations of delta and rho. It adheres to an ++ // interface, implemented via a template type, that allows it to ++ // be replaced with an alternative. The interface consists of the ++ // static create, prepare_req, resp_update, and get_last_delta ++ // functions. ++ class OrigTracker { + Counter delta_prev_req; + Counter rho_prev_req; + uint32_t my_delta; + uint32_t my_rho; + - ServerInfo(Counter _delta_prev_req, - Counter _rho_prev_req) : - delta_prev_req(_delta_prev_req), - rho_prev_req(_rho_prev_req), ++ public: ++ ++ OrigTracker(Counter global_delta, ++ Counter global_rho) : ++ delta_prev_req(global_delta), ++ rho_prev_req(global_rho), + my_delta(0), + my_rho(0) - { - // empty ++ { /* empty */ } ++ ++ static inline OrigTracker create(Counter the_delta, Counter the_rho) { ++ return OrigTracker(the_delta, the_rho); + } + - inline void req_update(Counter delta, Counter rho) { - delta_prev_req = delta; - rho_prev_req = rho; ++ inline ReqParams prepare_req(Counter& the_delta, Counter& the_rho) { ++ Counter delta_out = 1 + the_delta - delta_prev_req - my_delta; ++ Counter rho_out = 1 + the_rho - rho_prev_req - my_rho; ++ delta_prev_req = the_delta; ++ rho_prev_req = the_rho; + my_delta = 0; + my_rho = 0; ++ return ReqParams(uint32_t(delta_out), uint32_t(rho_out)); + } + - inline void resp_update(PhaseType phase) { ++ inline void resp_update(PhaseType phase, ++ Counter& the_delta, ++ Counter& the_rho) { ++ ++the_delta; + ++my_delta; - if (phase == PhaseType::reservation) ++my_rho; ++ if (phase == PhaseType::reservation) { ++ ++the_rho; ++ ++my_rho; ++ } ++ } ++ ++ inline Counter get_last_delta() const { ++ return delta_prev_req; + } - }; ++ }; // struct OrigTracker ++ ++ ++ // BorrowingTracker always returns a positive delta and rho. If ++ // not enough responses have come in to allow that, we will borrow ++ // a future response and repay it later. ++ class BorrowingTracker { ++ Counter delta_prev_req; ++ Counter rho_prev_req; ++ Counter delta_borrow; ++ Counter rho_borrow; ++ ++ public: ++ ++ BorrowingTracker(Counter global_delta, Counter global_rho) : ++ delta_prev_req(global_delta), ++ rho_prev_req(global_rho), ++ delta_borrow(0), ++ rho_borrow(0) ++ { /* empty */ } ++ ++ static inline BorrowingTracker create(Counter the_delta, ++ Counter the_rho) { ++ return BorrowingTracker(the_delta, the_rho); ++ } ++ ++ inline Counter calc_with_borrow(const Counter& global, ++ const Counter& previous, ++ Counter& borrow) { ++ Counter result = global - previous; ++ if (0 == result) { ++ // if no replies have come in, borrow one from the future ++ ++borrow; ++ return 1; ++ } else if (result > borrow) { ++ // if we can give back all of what we borrowed, do so ++ result -= borrow; ++ borrow = 0; ++ return result; ++ } else { ++ // can only return part of what was borrowed in order to ++ // return positive ++ borrow = borrow - result + 1; ++ return 1; ++ } ++ } ++ ++ inline ReqParams prepare_req(Counter& the_delta, Counter& the_rho) { ++ Counter delta_out = ++ calc_with_borrow(the_delta, delta_prev_req, delta_borrow); ++ Counter rho_out = ++ calc_with_borrow(the_rho, rho_prev_req, rho_borrow); ++ delta_prev_req = the_delta; ++ rho_prev_req = the_rho; ++ return ReqParams(uint32_t(delta_out), uint32_t(rho_out)); ++ } ++ ++ inline void resp_update(PhaseType phase, ++ Counter& the_delta, ++ Counter& the_rho) { ++ ++the_delta; ++ if (phase == PhaseType::reservation) { ++ ++the_rho; ++ } ++ } ++ ++ inline Counter get_last_delta() const { ++ return delta_prev_req; ++ } ++ }; // struct BorrowingTracker + + + // S is server identifier type - template ++ // T is the server info class that adheres to ServerTrackerIfc interface ++ template + class ServiceTracker { + // we don't want to include gtest.h just for FRIEND_TEST + friend class dmclock_client_server_erase_Test; + + using TimePoint = decltype(std::chrono::steady_clock::now()); + using Duration = std::chrono::milliseconds; + using MarkPoint = std::pair; + + Counter delta_counter; // # reqs completed + Counter rho_counter; // # reqs completed via reservation - std::map server_map; ++ std::map server_map; + mutable std::mutex data_mtx; // protects Counters and map + + using DataGuard = std::lock_guard; + + // clean config + + std::deque clean_mark_points; - Duration clean_age; // age at which ServerInfo cleaned ++ Duration clean_age; // age at which server tracker cleaned + + // NB: All threads declared at end, so they're destructed firs! + + std::unique_ptr cleaning_job; + + + public: + + // we have to start the counters at 1, as 0 is used in the + // cleaning process + template + ServiceTracker(std::chrono::duration _clean_every, + std::chrono::duration _clean_age) : + delta_counter(1), + rho_counter(1), + clean_age(std::chrono::duration_cast(_clean_age)) + { + cleaning_job = + std::unique_ptr( + new RunEvery(_clean_every, + std::bind(&ServiceTracker::do_clean, this))); + } + + + // the reason we're overloading the constructor rather than + // using default values for the arguments is so that callers + // have to either use all defaults or specify all timings; with + // default arguments they could specify some without others + ServiceTracker() : + ServiceTracker(std::chrono::minutes(5), std::chrono::minutes(10)) + { + // empty + } + + + /* + * Incorporates the RespParams received into the various counter. + */ + void track_resp(const S& server_id, const PhaseType& phase) { + DataGuard g(data_mtx); + + auto it = server_map.find(server_id); + if (server_map.end() == it) { + // this code can only run if a request did not precede the + // response or if the record was cleaned up b/w when + // the request was made and now - ServerInfo si(delta_counter, rho_counter); - si.resp_update(phase); - server_map.emplace(server_id, si); - } else { - it->second.resp_update(phase); - } - - ++delta_counter; - if (PhaseType::reservation == phase) { - ++rho_counter; ++ auto i = server_map.emplace(server_id, ++ T::create(delta_counter, rho_counter)); ++ it = i.first; + } ++ it->second.resp_update(phase, delta_counter, rho_counter); + } + - + /* + * Returns the ReqParams for the given server. + */ + ReqParams get_req_params(const S& server) { + DataGuard g(data_mtx); + auto it = server_map.find(server); + if (server_map.end() == it) { - server_map.emplace(server, ServerInfo(delta_counter, rho_counter)); ++ server_map.emplace(server, ++ T::create(delta_counter, rho_counter)); + return ReqParams(1, 1); + } else { - Counter delta = - 1 + delta_counter - it->second.delta_prev_req - it->second.my_delta; - Counter rho = - 1 + rho_counter - it->second.rho_prev_req - it->second.my_rho; - - it->second.req_update(delta_counter, rho_counter); - - return ReqParams(uint32_t(delta), uint32_t(rho)); ++ return it->second.prepare_req(delta_counter, rho_counter); + } + } + + private: + + /* + * This is being called regularly by RunEvery. Every time it's + * called it notes the time and delta counter (mark point) in a + * deque. It also looks at the deque to find the most recent + * mark point that is older than clean_age. It then walks the + * map and delete all server entries that were last used before + * that mark point. + */ + void do_clean() { + TimePoint now = std::chrono::steady_clock::now(); + DataGuard g(data_mtx); + clean_mark_points.emplace_back(MarkPoint(now, delta_counter)); + + Counter earliest = 0; + auto point = clean_mark_points.front(); + while (point.first <= now - clean_age) { + earliest = point.second; + clean_mark_points.pop_front(); + point = clean_mark_points.front(); + } + + if (earliest > 0) { + for (auto i = server_map.begin(); + i != server_map.end(); + /* empty */) { + auto i2 = i++; - if (i2->second.delta_prev_req <= earliest) { ++ if (i2->second.get_last_delta() <= earliest) { + server_map.erase(i2); + } + } + } + } // do_clean + }; // class ServiceTracker + } +} diff --cc src/dmclock/src/dmclock_server.h index aac848746c0c,000000000000..8f0e1925e05c mode 100644,000000..100644 --- a/src/dmclock/src/dmclock_server.h +++ b/src/dmclock/src/dmclock_server.h @@@ -1,1629 -1,0 +1,1652 @@@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +/* + * Copyright (C) 2017 Red Hat Inc. + */ + + +#pragma once + +/* COMPILATION OPTIONS + * + * By default we include an optimization over the originally published + * dmclock algorithm using not the values of rho and delta that were + * sent in with a request but instead the most recent rho and delta + * values from the requests's client. To restore the algorithm's + * original behavior, define DO_NOT_DELAY_TAG_CALC (i.e., compiler + * argument -DDO_NOT_DELAY_TAG_CALC). + * + * The prop_heap does not seem to be necessary. The only thing it + * would help with is quickly finding the mininum proportion/prioity + * when an idle client became active. To have the code maintain the + * proportional heap, define USE_PROP_HEAP (i.e., compiler argument + * -DUSE_PROP_HEAP). + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "indirect_intrusive_heap.h" +#include "run_every.h" +#include "dmclock_util.h" +#include "dmclock_recs.h" + +#ifdef PROFILE +#include "profile.h" +#endif + + +namespace crimson { + + namespace dmclock { + + namespace c = crimson; + + constexpr double max_tag = std::numeric_limits::is_iec559 ? + std::numeric_limits::infinity() : + std::numeric_limits::max(); + constexpr double min_tag = std::numeric_limits::is_iec559 ? + -std::numeric_limits::infinity() : + std::numeric_limits::lowest(); + constexpr uint tag_modulo = 1000000; + + struct ClientInfo { + double reservation; // minimum + double weight; // proportional + double limit; // maximum + + // multiplicative inverses of above, which we use in calculations + // and don't want to recalculate repeatedly + double reservation_inv; + double weight_inv; + double limit_inv; + + // order parameters -- min, "normal", max + ClientInfo(double _reservation, double _weight, double _limit) : + reservation(_reservation), + weight(_weight), + limit(_limit), + reservation_inv(0.0 == reservation ? 0.0 : 1.0 / reservation), + weight_inv( 0.0 == weight ? 0.0 : 1.0 / weight), + limit_inv( 0.0 == limit ? 0.0 : 1.0 / limit) + { + // empty + } + + + friend std::ostream& operator<<(std::ostream& out, + const ClientInfo& client) { + out << + "{ ClientInfo:: r:" << client.reservation << + " w:" << std::fixed << client.weight << + " l:" << std::fixed << client.limit << + " 1/r:" << std::fixed << client.reservation_inv << + " 1/w:" << std::fixed << client.weight_inv << + " 1/l:" << std::fixed << client.limit_inv << + " }"; + return out; + } + }; // class ClientInfo + + + struct RequestTag { + double reservation; + double proportion; + double limit; + bool ready; // true when within limit - #ifndef DO_NOT_DELAY_TAG_CALC + Time arrival; - #endif + + RequestTag(const RequestTag& prev_tag, + const ClientInfo& client, + const uint32_t delta, + const uint32_t rho, + const Time time, - const double cost = 0.0) : - reservation(cost + tag_calc(time, - prev_tag.reservation, - client.reservation_inv, - rho, - true)), - proportion(tag_calc(time, - prev_tag.proportion, - client.weight_inv, - delta, - true)), - limit(tag_calc(time, - prev_tag.limit, - client.limit_inv, - delta, - false)), - ready(false) - #ifndef DO_NOT_DELAY_TAG_CALC - , arrival(time) - #endif ++ const double cost = 0.0, ++ const double anticipation_timeout = 0.0) : ++ ready(false), ++ arrival(time) + { ++ Time max_time = time; ++ if (time - anticipation_timeout < prev_tag.arrival) ++ max_time -= anticipation_timeout; ++ ++ reservation = cost + tag_calc(max_time, ++ prev_tag.reservation, ++ client.reservation_inv, ++ rho, ++ true); ++ proportion = tag_calc(max_time, ++ prev_tag.proportion, ++ client.weight_inv, ++ delta, ++ true); ++ limit = tag_calc(max_time, ++ prev_tag.limit, ++ client.limit_inv, ++ delta, ++ false); ++ + assert(reservation < max_tag || proportion < max_tag); + } + + RequestTag(const RequestTag& prev_tag, + const ClientInfo& client, + const ReqParams req_params, + const Time time, - const double cost = 0.0) : - RequestTag(prev_tag, client, req_params.delta, req_params.rho, time, cost) ++ const double cost = 0.0, ++ const double anticipation_timeout = 0.0) : ++ RequestTag(prev_tag, client, req_params.delta, req_params.rho, time, ++ cost, anticipation_timeout) + { /* empty */ } + + RequestTag(double _res, double _prop, double _lim, const Time _arrival) : + reservation(_res), + proportion(_prop), + limit(_lim), - ready(false) - #ifndef DO_NOT_DELAY_TAG_CALC - , arrival(_arrival) - #endif ++ ready(false), ++ arrival(_arrival) + { + assert(reservation < max_tag || proportion < max_tag); + } + + RequestTag(const RequestTag& other) : + reservation(other.reservation), + proportion(other.proportion), + limit(other.limit), - ready(other.ready) - #ifndef DO_NOT_DELAY_TAG_CALC - , arrival(other.arrival) - #endif ++ ready(other.ready), ++ arrival(other.arrival) + { + // empty + } + + static std::string format_tag_change(double before, double after) { + if (before == after) { + return std::string("same"); + } else { + std::stringstream ss; + ss << format_tag(before) << "=>" << format_tag(after); + return ss.str(); + } + } + + static std::string format_tag(double value) { + if (max_tag == value) { + return std::string("max"); + } else if (min_tag == value) { + return std::string("min"); + } else { + return format_time(value, tag_modulo); + } + } + + private: + + static double tag_calc(const Time time, + double prev, + double increment, + uint32_t dist_req_val, + bool extreme_is_high) { + if (0.0 == increment) { + return extreme_is_high ? max_tag : min_tag; + } else { + if (0 != dist_req_val) { + increment *= dist_req_val; + } + return std::max(time, prev + increment); + } + } + + friend std::ostream& operator<<(std::ostream& out, + const RequestTag& tag) { + out << + "{ RequestTag:: ready:" << (tag.ready ? "true" : "false") << + " r:" << format_tag(tag.reservation) << + " p:" << format_tag(tag.proportion) << + " l:" << format_tag(tag.limit) << +#if 0 // try to resolve this to make sure Time is operator<<'able. +#ifndef DO_NOT_DELAY_TAG_CALC + " arrival:" << tag.arrival << +#endif +#endif + " }"; + return out; + } + }; // class RequestTag + + + // C is client identifier type, R is request type, + // U1 determines whether to use client information function dynamically, + // B is heap branching factor + template + class PriorityQueueBase { + // we don't want to include gtest.h just for FRIEND_TEST + friend class dmclock_server_client_idle_erase_Test; + + public: + + using RequestRef = std::unique_ptr; + + protected: + + using TimePoint = decltype(std::chrono::steady_clock::now()); + using Duration = std::chrono::milliseconds; + using MarkPoint = std::pair; + + enum class ReadyOption {ignore, lowers, raises}; + + // forward decl for friend decls + template + struct ClientCompare; + + class ClientReq { + friend PriorityQueueBase; + + RequestTag tag; + C client_id; + RequestRef request; + + public: + + ClientReq(const RequestTag& _tag, + const C& _client_id, + RequestRef&& _request) : + tag(_tag), + client_id(_client_id), + request(std::move(_request)) + { + // empty + } + + friend std::ostream& operator<<(std::ostream& out, const ClientReq& c) { + out << "{ ClientReq:: tag:" << c.tag << " client:" << + c.client_id << " }"; + return out; + } + }; // class ClientReq + + public: + + // NOTE: ClientRec is in the "public" section for compatibility + // with g++ 4.8.4, which complains if it's not. By g++ 6.3.1 + // ClientRec could be "protected" with no issue. [See comments + // associated with function submit_top_request.] + class ClientRec { + friend PriorityQueueBase; + + C client; + RequestTag prev_tag; + std::deque requests; + + // amount added from the proportion tag as a result of + // an idle client becoming unidle + double prop_delta = 0.0; + - c::IndIntruHeapData reserv_heap_data; - c::IndIntruHeapData lim_heap_data; - c::IndIntruHeapData ready_heap_data; ++ c::IndIntruHeapData reserv_heap_data {}; ++ c::IndIntruHeapData lim_heap_data {}; ++ c::IndIntruHeapData ready_heap_data {}; +#if USE_PROP_HEAP - c::IndIntruHeapData prop_heap_data; ++ c::IndIntruHeapData prop_heap_data {}; +#endif + + public: + + ClientInfo info; + bool idle; + Counter last_tick; + uint32_t cur_rho; + uint32_t cur_delta; + + ClientRec(C _client, + const ClientInfo& _info, + Counter current_tick) : + client(_client), + prev_tag(0.0, 0.0, 0.0, TimeZero), + info(_info), + idle(true), + last_tick(current_tick), + cur_rho(1), + cur_delta(1) + { + // empty + } + + inline const RequestTag& get_req_tag() const { + return prev_tag; + } + + static inline void assign_unpinned_tag(double& lhs, const double rhs) { + if (rhs != max_tag && rhs != min_tag) { + lhs = rhs; + } + } + + inline void update_req_tag(const RequestTag& _prev, + const Counter& _tick) { + assign_unpinned_tag(prev_tag.reservation, _prev.reservation); + assign_unpinned_tag(prev_tag.limit, _prev.limit); + assign_unpinned_tag(prev_tag.proportion, _prev.proportion); ++ prev_tag.arrival = _prev.arrival; + last_tick = _tick; + } + + inline void add_request(const RequestTag& tag, + const C& client_id, + RequestRef&& request) { + requests.emplace_back(ClientReq(tag, client_id, std::move(request))); + } + + inline const ClientReq& next_request() const { + return requests.front(); + } + + inline ClientReq& next_request() { + return requests.front(); + } + + inline void pop_request() { + requests.pop_front(); + } + + inline bool has_request() const { + return !requests.empty(); + } + + inline size_t request_count() const { + return requests.size(); + } + + // NB: because a deque is the underlying structure, this + // operation might be expensive + bool remove_by_req_filter_fw(std::function filter_accum) { + bool any_removed = false; + for (auto i = requests.begin(); + i != requests.end(); + /* no inc */) { + if (filter_accum(std::move(*i->request))) { + any_removed = true; + i = requests.erase(i); + } else { + ++i; + } + } + return any_removed; + } + + // NB: because a deque is the underlying structure, this + // operation might be expensive + bool remove_by_req_filter_bw(std::function filter_accum) { + bool any_removed = false; + for (auto i = requests.rbegin(); + i != requests.rend(); + /* no inc */) { + if (filter_accum(std::move(*i->request))) { + any_removed = true; + i = decltype(i){ requests.erase(std::next(i).base()) }; + } else { + ++i; + } + } + return any_removed; + } + + inline bool + remove_by_req_filter(std::function filter_accum, + bool visit_backwards) { + if (visit_backwards) { + return remove_by_req_filter_bw(filter_accum); + } else { + return remove_by_req_filter_fw(filter_accum); + } + } + + friend std::ostream& + operator<<(std::ostream& out, + const typename PriorityQueueBase::ClientRec& e) { + out << "{ ClientRec::" << + " client:" << e.client << + " prev_tag:" << e.prev_tag << + " req_count:" << e.requests.size() << + " top_req:"; + if (e.has_request()) { + out << e.next_request(); + } else { + out << "none"; + } + out << " }"; + + return out; + } + }; // class ClientRec + + using ClientRecRef = std::shared_ptr; + + // when we try to get the next request, we'll be in one of three + // situations -- we'll have one to return, have one that can + // fire in the future, or not have any + enum class NextReqType { returning, future, none }; + + // specifies which queue next request will get popped from + enum class HeapId { reservation, ready }; + + // this is returned from next_req to tell the caller the situation + struct NextReq { + NextReqType type; + union { + HeapId heap_id; + Time when_ready; + }; ++ ++ inline explicit NextReq() : ++ type(NextReqType::none) ++ { } ++ ++ inline NextReq(HeapId _heap_id) : ++ type(NextReqType::returning), ++ heap_id(_heap_id) ++ { } ++ ++ inline NextReq(Time _when_ready) : ++ type(NextReqType::future), ++ when_ready(_when_ready) ++ { } ++ ++ // calls to this are clearer than calls to the default ++ // constructor ++ static inline NextReq none() { ++ return NextReq(); ++ } + }; + + + // a function that can be called to look up client information + using ClientInfoFunc = std::function; + + + bool empty() const { + DataGuard g(data_mtx); + return (resv_heap.empty() || ! resv_heap.top().has_request()); + } + + + size_t client_count() const { + DataGuard g(data_mtx); + return resv_heap.size(); + } + + + size_t request_count() const { + DataGuard g(data_mtx); + size_t total = 0; + for (auto i = resv_heap.cbegin(); i != resv_heap.cend(); ++i) { + total += i->request_count(); + } + return total; + } + + + bool remove_by_req_filter(std::function filter_accum, + bool visit_backwards = false) { + bool any_removed = false; + DataGuard g(data_mtx); + for (auto i : client_map) { + bool modified = + i.second->remove_by_req_filter(filter_accum, visit_backwards); + if (modified) { + resv_heap.adjust(*i.second); + limit_heap.adjust(*i.second); + ready_heap.adjust(*i.second); +#if USE_PROP_HEAP + prop_heap.adjust(*i.second); +#endif + any_removed = true; + } + } + return any_removed; + } + + + // use as a default value when no accumulator is provide + static void request_sink(R&& req) { + // do nothing + } + + + void remove_by_client(const C& client, + bool reverse = false, + std::function accum = request_sink) { + DataGuard g(data_mtx); + + auto i = client_map.find(client); + + if (i == client_map.end()) return; + + if (reverse) { + for (auto j = i->second->requests.rbegin(); + j != i->second->requests.rend(); + ++j) { + accum(std::move(*j->request)); + } + } else { + for (auto j = i->second->requests.begin(); + j != i->second->requests.end(); + ++j) { + accum(std::move(*j->request)); + } + } + + i->second->requests.clear(); + + resv_heap.adjust(*i->second); + limit_heap.adjust(*i->second); + ready_heap.adjust(*i->second); +#if USE_PROP_HEAP + prop_heap.adjust(*i->second); +#endif + } + + + uint get_heap_branching_factor() const { + return B; + } + + + void update_client_info(const C& client_id) { + DataGuard g(data_mtx); + auto client_it = client_map.find(client_id); + if (client_map.end() != client_it) { + ClientRec& client = (*client_it->second); + client.info = client_info_f(client_id); + } + } + + + void update_client_infos() { + DataGuard g(data_mtx); + for (auto i : client_map) { + i.second->info = client_info_f(i.second->client); + } + } + + + friend std::ostream& operator<<(std::ostream& out, + const PriorityQueueBase& q) { + std::lock_guard guard(q.data_mtx); + + out << "{ PriorityQueue::"; + for (const auto& c : q.client_map) { + out << " { client:" << c.first << ", record:" << *c.second << + " }"; + } + if (!q.resv_heap.empty()) { + const auto& resv = q.resv_heap.top(); + out << " { reservation_top:" << resv << " }"; + const auto& ready = q.ready_heap.top(); + out << " { ready_top:" << ready << " }"; + const auto& limit = q.limit_heap.top(); + out << " { limit_top:" << limit << " }"; + } else { + out << " HEAPS-EMPTY"; + } + out << " }"; + + return out; + } + + // for debugging + void display_queues(std::ostream& out, + bool show_res = true, + bool show_lim = true, + bool show_ready = true, + bool show_prop = true) const { + auto filter = [](const ClientRec& e)->bool { return true; }; + DataGuard g(data_mtx); + if (show_res) { + resv_heap.display_sorted(out << "RESER:", filter); + } + if (show_lim) { + limit_heap.display_sorted(out << "LIMIT:", filter); + } + if (show_ready) { + ready_heap.display_sorted(out << "READY:", filter); + } +#if USE_PROP_HEAP + if (show_prop) { + prop_heap.display_sorted(out << "PROPO:", filter); + } +#endif + } // display_queues + + + protected: + + // The ClientCompare functor is essentially doing a precedes? + // operator, returning true if and only if the first parameter + // must precede the second parameter. If the second must precede + // the first, or if they are equivalent, false should be + // returned. The reason for this behavior is that it will be + // called to test if two items are out of order and if true is + // returned it will reverse the items. Therefore false is the + // default return when it doesn't matter to prevent unnecessary + // re-ordering. + // + // The template is supporting variations in sorting based on the + // heap in question and allowing these variations to be handled + // at compile-time. + // + // tag_field determines which tag is being used for comparison + // + // ready_opt determines how the ready flag influences the sort + // + // use_prop_delta determines whether the proportional delta is + // added in for comparison + template + struct ClientCompare { + bool operator()(const ClientRec& n1, const ClientRec& n2) const { + if (n1.has_request()) { + if (n2.has_request()) { + const auto& t1 = n1.next_request().tag; + const auto& t2 = n2.next_request().tag; + if (ReadyOption::ignore == ready_opt || t1.ready == t2.ready) { + // if we don't care about ready or the ready values are the same + if (use_prop_delta) { + return (t1.*tag_field + n1.prop_delta) < + (t2.*tag_field + n2.prop_delta); + } else { + return t1.*tag_field < t2.*tag_field; + } + } else if (ReadyOption::raises == ready_opt) { + // use_ready == true && the ready fields are different + return t1.ready; + } else { + return t2.ready; + } + } else { + // n1 has request but n2 does not + return true; + } + } else if (n2.has_request()) { + // n2 has request but n1 does not + return false; + } else { + // both have none; keep stable w false + return false; + } + } + }; + + ClientInfoFunc client_info_f; + static constexpr bool is_dynamic_cli_info_f = U1; + + mutable std::mutex data_mtx; + using DataGuard = std::lock_guard; + + // stable mapping between client ids and client queues + std::map client_map; + + c::IndIntruHeap, + B> resv_heap; +#if USE_PROP_HEAP + c::IndIntruHeap, + B> prop_heap; +#endif + c::IndIntruHeap, + B> limit_heap; + c::IndIntruHeap, + B> ready_heap; + + // if all reservations are met and all other requestes are under + // limit, this will allow the request next in terms of + // proportion to still get issued + bool allow_limit_break; ++ double anticipation_timeout; + + std::atomic_bool finishing; + + // every request creates a tick + Counter tick = 0; + + // performance data collection + size_t reserv_sched_count = 0; + size_t prop_sched_count = 0; + size_t limit_break_sched_count = 0; + + Duration idle_age; + Duration erase_age; + Duration check_time; + std::deque clean_mark_points; + + // NB: All threads declared at end, so they're destructed first! + + std::unique_ptr cleaning_job; + + + // COMMON constructor that others feed into; we can accept three + // different variations of durations + template + PriorityQueueBase(ClientInfoFunc _client_info_f, + std::chrono::duration _idle_age, + std::chrono::duration _erase_age, + std::chrono::duration _check_time, - bool _allow_limit_break) : ++ bool _allow_limit_break, ++ double _anticipation_timeout) : + client_info_f(_client_info_f), + allow_limit_break(_allow_limit_break), ++ anticipation_timeout(_anticipation_timeout), + finishing(false), + idle_age(std::chrono::duration_cast(_idle_age)), + erase_age(std::chrono::duration_cast(_erase_age)), + check_time(std::chrono::duration_cast(_check_time)) + { + assert(_erase_age >= _idle_age); + assert(_check_time < _idle_age); + cleaning_job = + std::unique_ptr( + new RunEvery(check_time, + std::bind(&PriorityQueueBase::do_clean, this))); + } + + + ~PriorityQueueBase() { + finishing = true; + } + + + inline const ClientInfo get_cli_info(ClientRec& client) const { + if (is_dynamic_cli_info_f) { + client.info = client_info_f(client.client); + } + return client.info; + } + + + // data_mtx must be held by caller + void do_add_request(RequestRef&& request, + const C& client_id, + const ReqParams& req_params, + const Time time, + const double cost = 0.0) { + ++tick; + + // this pointer will help us create a reference to a shared + // pointer, no matter which of two codepaths we take + ClientRec* temp_client; + + auto client_it = client_map.find(client_id); + if (client_map.end() != client_it) { + temp_client = &(*client_it->second); // address of obj of shared_ptr + } else { + ClientInfo info = client_info_f(client_id); + ClientRecRef client_rec = + std::make_shared(client_id, info, tick); + resv_heap.push(client_rec); +#if USE_PROP_HEAP + prop_heap.push(client_rec); +#endif + limit_heap.push(client_rec); + ready_heap.push(client_rec); + client_map[client_id] = client_rec; + temp_client = &(*client_rec); // address of obj of shared_ptr + } + + // for convenience, we'll create a reference to the shared pointer + ClientRec& client = *temp_client; + + if (client.idle) { + // We need to do an adjustment so that idle clients compete + // fairly on proportional tags since those tags may have + // drifted from real-time. Either use the lowest existing + // proportion tag -- O(1) -- or the client with the lowest + // previous proportion tag -- O(n) where n = # clients. + // + // So we don't have to maintain a propotional queue that + // keeps the minimum on proportional tag alone (we're + // instead using a ready queue), we'll have to check each + // client. + // + // The alternative would be to maintain a proportional queue + // (define USE_PROP_TAG) and do an O(1) operation here. + + // Was unable to confirm whether equality testing on + // std::numeric_limits::max() is guaranteed, so + // we'll use a compile-time calculated trigger that is one + // third the max, which should be much larger than any + // expected organic value. + constexpr double lowest_prop_tag_trigger = + std::numeric_limits::max() / 3.0; + + double lowest_prop_tag = std::numeric_limits::max(); + for (auto const &c : client_map) { + // don't use ourselves (or anything else that might be + // listed as idle) since we're now in the map + if (!c.second->idle) { + double p; + // use either lowest proportion tag or previous proportion tag + if (c.second->has_request()) { + p = c.second->next_request().tag.proportion + + c.second->prop_delta; + } else { + p = c.second->get_req_tag().proportion + c.second->prop_delta; + } + + if (p < lowest_prop_tag) { + lowest_prop_tag = p; + } + } + } + + // if this conditional does not fire, it + if (lowest_prop_tag < lowest_prop_tag_trigger) { + client.prop_delta = lowest_prop_tag - time; + } + client.idle = false; + } // if this client was idle + +#ifndef DO_NOT_DELAY_TAG_CALC + RequestTag tag(0, 0, 0, time); + + if (!client.has_request()) { + tag = RequestTag(client.get_req_tag(), + get_cli_info(client), + req_params, + time, - cost); ++ cost, ++ anticipation_timeout); + + // copy tag to previous tag for client + client.update_req_tag(tag, tick); + } +#else - RequestTag tag(client.get_req_tag(), get_cli_info(client), req_params, time, cost); ++ RequestTag tag(client.get_req_tag(), ++ get_cli_info(client), ++ req_params, ++ time, ++ cost, ++ anticipation_timeout); ++ + // copy tag to previous tag for client + client.update_req_tag(tag, tick); +#endif + + client.add_request(tag, client.client, std::move(request)); + if (1 == client.requests.size()) { + // NB: can the following 4 calls to adjust be changed + // promote? Can adding a request ever demote a client in the + // heaps? + resv_heap.adjust(client); + limit_heap.adjust(client); + ready_heap.adjust(client); +#if USE_PROP_HEAP + prop_heap.adjust(client); +#endif + } + + client.cur_rho = req_params.rho; + client.cur_delta = req_params.delta; + + resv_heap.adjust(client); + limit_heap.adjust(client); + ready_heap.adjust(client); +#if USE_PROP_HEAP + prop_heap.adjust(client); +#endif + } // add_request + + + // data_mtx should be held when called; top of heap should have + // a ready request + template + void pop_process_request(IndIntruHeap& heap, + std::function process) { + // gain access to data + ClientRec& top = heap.top(); + + RequestRef request = std::move(top.next_request().request); +#ifndef DO_NOT_DELAY_TAG_CALC + RequestTag tag = top.next_request().tag; +#endif + + // pop request and adjust heaps + top.pop_request(); + +#ifndef DO_NOT_DELAY_TAG_CALC + if (top.has_request()) { + ClientReq& next_first = top.next_request(); + next_first.tag = RequestTag(tag, get_cli_info(top), + top.cur_delta, top.cur_rho, - next_first.tag.arrival); ++ next_first.tag.arrival, ++ 0.0, anticipation_timeout); + + // copy tag to previous tag for client + top.update_req_tag(next_first.tag, tick); + } +#endif + + resv_heap.demote(top); + limit_heap.adjust(top); +#if USE_PROP_HEAP + prop_heap.demote(top); +#endif + ready_heap.demote(top); + + // process + process(top.client, request); + } // pop_process_request + + + // data_mtx should be held when called + void reduce_reservation_tags(ClientRec& client) { + for (auto& r : client.requests) { + r.tag.reservation -= client.info.reservation_inv; + +#ifndef DO_NOT_DELAY_TAG_CALC + // reduce only for front tag. because next tags' value are invalid + break; +#endif + } + // don't forget to update previous tag + client.prev_tag.reservation -= client.info.reservation_inv; + resv_heap.promote(client); + } + + + // data_mtx should be held when called + void reduce_reservation_tags(const C& client_id) { + auto client_it = client_map.find(client_id); + + // means the client was cleaned from map; should never happen + // as long as cleaning times are long enough + assert(client_map.end() != client_it); + reduce_reservation_tags(*client_it->second); + } + + + // data_mtx should be held when called + NextReq do_next_request(Time now) { - NextReq result{}; - - // if reservation queue is empty, all are empty (i.e., no active clients) ++ // if reservation queue is empty, all are empty (i.e., no ++ // active clients) + if(resv_heap.empty()) { - result.type = NextReqType::none; - return result; ++ return NextReq::none(); + } + + // try constraint (reservation) based scheduling + + auto& reserv = resv_heap.top(); + if (reserv.has_request() && + reserv.next_request().tag.reservation <= now) { - result.type = NextReqType::returning; - result.heap_id = HeapId::reservation; - return result; ++ return NextReq(HeapId::reservation); + } + + // no existing reservations before now, so try weight-based + // scheduling + + // all items that are within limit are eligible based on + // priority + auto limits = &limit_heap.top(); + while (limits->has_request() && + !limits->next_request().tag.ready && + limits->next_request().tag.limit <= now) { + limits->next_request().tag.ready = true; + ready_heap.promote(*limits); + limit_heap.demote(*limits); + + limits = &limit_heap.top(); + } + + auto& readys = ready_heap.top(); + if (readys.has_request() && + readys.next_request().tag.ready && + readys.next_request().tag.proportion < max_tag) { - result.type = NextReqType::returning; - result.heap_id = HeapId::ready; - return result; ++ return NextReq(HeapId::ready); + } + + // if nothing is schedulable by reservation or + // proportion/weight, and if we allow limit break, try to + // schedule something with the lowest proportion tag or + // alternatively lowest reservation tag. + if (allow_limit_break) { + if (readys.has_request() && + readys.next_request().tag.proportion < max_tag) { - result.type = NextReqType::returning; - result.heap_id = HeapId::ready; - return result; ++ return NextReq(HeapId::ready); + } else if (reserv.has_request() && + reserv.next_request().tag.reservation < max_tag) { - result.type = NextReqType::returning; - result.heap_id = HeapId::reservation; - return result; ++ return NextReq(HeapId::reservation); + } + } + + // nothing scheduled; make sure we re-run when next + // reservation item or next limited item comes up + + Time next_call = TimeMax; + if (resv_heap.top().has_request()) { + next_call = + min_not_0_time(next_call, + resv_heap.top().next_request().tag.reservation); + } + if (limit_heap.top().has_request()) { + const auto& next = limit_heap.top().next_request(); + assert(!next.tag.ready || max_tag == next.tag.proportion); + next_call = min_not_0_time(next_call, next.tag.limit); + } + if (next_call < TimeMax) { - result.type = NextReqType::future; - result.when_ready = next_call; - return result; ++ return NextReq(next_call); + } else { - result.type = NextReqType::none; - return result; ++ return NextReq::none(); + } + } // do_next_request + + + // if possible is not zero and less than current then return it; + // otherwise return current; the idea is we're trying to find + // the minimal time but ignoring zero + static inline const Time& min_not_0_time(const Time& current, + const Time& possible) { + return TimeZero == possible ? current : std::min(current, possible); + } + + + /* + * This is being called regularly by RunEvery. Every time it's + * called it notes the time and delta counter (mark point) in a + * deque. It also looks at the deque to find the most recent + * mark point that is older than clean_age. It then walks the + * map and delete all server entries that were last used before + * that mark point. + */ + void do_clean() { + TimePoint now = std::chrono::steady_clock::now(); + DataGuard g(data_mtx); + clean_mark_points.emplace_back(MarkPoint(now, tick)); + + // first erase the super-old client records + + Counter erase_point = 0; + auto point = clean_mark_points.front(); + while (point.first <= now - erase_age) { + erase_point = point.second; + clean_mark_points.pop_front(); + point = clean_mark_points.front(); + } + + Counter idle_point = 0; + for (auto i : clean_mark_points) { + if (i.first <= now - idle_age) { + idle_point = i.second; + } else { + break; + } + } + + if (erase_point > 0 || idle_point > 0) { + for (auto i = client_map.begin(); i != client_map.end(); /* empty */) { + auto i2 = i++; + if (erase_point && i2->second->last_tick <= erase_point) { + delete_from_heaps(i2->second); + client_map.erase(i2); + } else if (idle_point && i2->second->last_tick <= idle_point) { + i2->second->idle = true; + } + } // for + } // if + } // do_clean + + + // data_mtx must be held by caller + template + void delete_from_heap(ClientRecRef& client, + c::IndIntruHeap& heap) { + auto i = heap.rfind(client); + heap.remove(i); + } + + + // data_mtx must be held by caller + void delete_from_heaps(ClientRecRef& client) { + delete_from_heap(client, resv_heap); +#if USE_PROP_HEAP + delete_from_heap(client, prop_heap); +#endif + delete_from_heap(client, limit_heap); + delete_from_heap(client, ready_heap); + } + }; // class PriorityQueueBase + + + template + class PullPriorityQueue : public PriorityQueueBase { + using super = PriorityQueueBase; + + public: + + // When a request is pulled, this is the return type. + struct PullReq { + struct Retn { + C client; + typename super::RequestRef request; + PhaseType phase; + }; + + typename super::NextReqType type; + boost::variant data; + + bool is_none() const { return type == super::NextReqType::none; } + + bool is_retn() const { return type == super::NextReqType::returning; } + Retn& get_retn() { + return boost::get(data); + } + + bool is_future() const { return type == super::NextReqType::future; } + Time getTime() const { return boost::get