From: J. Eric Ivancich <ivancich@redhat.com>
Date: Fri, 28 Apr 2017 21:13:42 +0000 (-0400)
Subject: Merge commit '0bca9fe991c7e1f623d2a387f54e63f18d3227eb' as 'src/dmclock'
X-Git-Tag: v12.0.3~143^2~1
X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=06b037b68b4bf5eb264de00d33c7e253cc8c9ef7;p=ceph-ci.git

Merge commit '0bca9fe991c7e1f623d2a387f54e63f18d3227eb' as 'src/dmclock'
---

06b037b68b4bf5eb264de00d33c7e253cc8c9ef7
diff --cc src/dmclock/.gitignore
index 00000000000,00000000000..c6ddef2752b
new file mode 100644
--- /dev/null
+++ b/src/dmclock/.gitignore
@@@ -1,0 -1,0 +1,4 @@@
++*~
++*.dSYM
++*.o
++build*
diff --cc src/dmclock/CMakeLists.txt
index 00000000000,00000000000..428863dc496
new file mode 100644
--- /dev/null
+++ b/src/dmclock/CMakeLists.txt
@@@ -1,0 -1,0 +1,32 @@@
++cmake_minimum_required(VERSION 2.8.11)
++
++set(CMAKE_CXX_FLAGS "-std=c++11 -Wno-write-strings ${CMAKE_CXX_FLAGS}")
++
++set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/modules/")
++
++if(DO_NOT_DELAY_TAG_CALC)
++  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDO_NOT_DELAY_TAG_CALC")
++endif()
++
++if(K_WAY_HEAP)
++  if(K_WAY_HEAP LESS 2)
++    message(FATAL_ERROR "K_WAY_HEAP value should be at least 2")
++  else()
++    set(CMAKE_CXX_SIM_FLAGS "-DK_WAY_HEAP=${K_WAY_HEAP}")
++  endif()
++endif()
++
++if (NOT(TARGET gtest AND TARGET gtest_main))
++  find_package(gtest REQUIRED)
++  include_directories(${GTEST_INCLUDE_DIRS})
++endif()
++
++find_package(Boost REQUIRED)
++include_directories(${Boost_INCLUDE_DIRS})
++
++add_subdirectory(src)
++add_subdirectory(sim)
++add_subdirectory(support)
++
++enable_testing()
++add_subdirectory(test)
diff --cc src/dmclock/README.md
index 00000000000,00000000000..ab67295b153
new file mode 100644
--- /dev/null
+++ b/src/dmclock/README.md
@@@ -1,0 -1,0 +1,45 @@@
++# dmclock
++
++This repository contains C++ 11 code that implements the dmclock
++distributed quality of service algorithm. See __mClock: Handling
++Throughput Variability for Hypervisor IO Scheduling__ by Gulati,
++Merchant, and Varman for a description of the algorithm.
++
++## Running cmake
++
++When running cmake, set the build type with either:
++
++    -DCMAKE_BUILD_TYPE=Debug
++    -DCMAKE_BUILD_TYPE=Release
++
++To turn on profiling, run cmake with an additional:
++
++    -DPROFILE=yes
++
++An optimization/fix to the published algorithm has been added and is
++on by default. To disable this optimization/fix run cmake with:
++
++    -DDO_NOT_DELAY_TAG_CALC=yes
++
++## Running make
++
++### Building the dmclock library
++
++The `make` command builds a library libdmclock.a. That plus the header
++files in the src directory allow one to use the implementation in
++their code.
++
++### Building unit tests
++
++The `make dmclock-tests` command builds unit tests.
++
++### Building simulations
++
++The `make dmclock-sims` command builds two simulations -- *dmc_sim*
++and *ssched_sim* -- which incorporate, respectively, the dmclock
++priority queue or a very simple scheduler for comparison. Other
++priority queue implementations could be added in the future.
++
++## dmclock API
++
++To be written....
diff --cc src/dmclock/benchmark/README.md
index 00000000000,00000000000..d945e986fc1
new file mode 100644
--- /dev/null
+++ b/src/dmclock/benchmark/README.md
@@@ -1,0 -1,0 +1,42 @@@
++# dmclock benchmarking
++
++**IMPORTANT**: now that K_WAY_HEAP is no longer allowed to have the
++value 1, the shell and Python scripts that generate the PDFs no longer
++work exactly correctly. Some effort to debug is necessary.
++
++This directory contains scripts to evaluate effects of different
++branching-factors (k=1 to k=11) in the IndirectIntrusiveHeap
++data-structure. IndirectIntrusiveHeap is now a k-way heap, so finding
++an ideal value for k (i.e., k=2 or k=3) for a particular work-load is
++important. Also, it is well-documented that the right choice of
++k-value improves the caching behaviour [Syed -- citation needed
++here]. As a result, the overall performance of an application using
++k-way heap increases significantly [Syed -- citation needed here].
++
++A rule of thumb is the following:
++	if number of elements are <= 6, use k=1
++	otherwise, use k=3.
++
++## Prerequisites
++
++requires python 2.7, gnuplot, and awk.
++  
++## Running benchmark
++
++./run.sh [name_of_the_output] [k_way] [repeat] # [Syed -- last two command line args do not work]
++
++The "run.sh" script looks for config files in the "configs" directory,
++and the final output is generated as
++"name_of_the_output.pdf". Internally, "run.sh" calls other scripts
++such as data_gen.sh, data_parser.py, and plot_gen.sh.
++
++## Modifying parameters
++
++To modify k-value and/or the amount of times each simulation is
++repeated, modify the following two variables in "run.sh" file:
++
++    k_way=[your_value]
++    repeat=[your_value]
++
++For example, k_way=3 means, the benchmark will compare simulations
++using 1-way, 2-way, and 3-way heaps.
diff --cc src/dmclock/benchmark/configs/dmc_sim_100_100.conf
index 00000000000,00000000000..c93d4c71f6d
new file mode 100644
--- /dev/null
+++ b/src/dmclock/benchmark/configs/dmc_sim_100_100.conf
@@@ -1,0 -1,0 +1,31 @@@
++[global]
++server_groups = 1
++client_groups = 2
++server_random_selection = true
++server_soft_limit = true
++
++[server.0]
++server_count = 100
++server_iops  = 160
++
++[client.0]
++client_count = 99
++client_wait = 0
++client_total_ops = 10000
++client_server_select_range = 100
++client_iops_goal = 200
++client_outstanding_ops = 32
++client_reservation = 100.0
++client_limit = 0.0
++client_weight = 1.0
++
++[client.1]
++client_count = 1
++client_wait = 10
++client_total_ops = 10000
++client_server_select_range = 100
++client_iops_goal = 200
++client_outstanding_ops = 32
++client_reservation = 100.0
++client_limit = 0.0
++client_weight = 1.0
diff --cc src/dmclock/benchmark/configs/dmc_sim_8_6.conf
index 00000000000,00000000000..28aeb401d44
new file mode 100644
--- /dev/null
+++ b/src/dmclock/benchmark/configs/dmc_sim_8_6.conf
@@@ -1,0 -1,0 +1,43 @@@
++[global]
++server_groups = 1
++client_groups = 3
++server_random_selection = true
++server_soft_limit = true
++
++[client.0]
++client_count = 2
++client_wait = 0
++client_total_ops = 1000
++client_server_select_range = 8
++client_iops_goal = 200
++client_outstanding_ops = 32
++client_reservation = 0.0
++client_limit = 0.0
++client_weight = 1.0
++
++[client.1]
++client_count = 2
++client_wait = 5
++client_total_ops = 1000
++client_server_select_range = 8
++client_iops_goal = 200
++client_outstanding_ops = 32
++client_reservation = 20.0
++client_limit = 40.0
++client_weight = 1.0
++
++[client.2]
++client_count = 2
++client_wait = 10
++client_total_ops = 1000
++client_server_select_range = 8
++client_iops_goal = 200
++client_outstanding_ops = 32
++client_reservation = 0.0
++client_limit = 50.0
++client_weight = 2.0
++
++
++[server.0]
++server_count = 8
++server_iops  = 160
diff --cc src/dmclock/benchmark/data_gen.sh
index 00000000000,00000000000..80a77bd9a1a
new file mode 100755
--- /dev/null
+++ b/src/dmclock/benchmark/data_gen.sh
@@@ -1,0 -1,0 +1,73 @@@
++#!/bin/bash
++config_dir="configs"
++repeat=2 #5
++
++# parameter check -- output_file name
++if [ "$1" != "" ]; then
++  output_file="$1"
++else
++  echo "Please provide the name of the output file"
++  exit
++fi
++
++# parameter check -- k-value
++if [ "$2" != "" ]; then
++  k_way="$2"
++else
++  echo "Please provide the maximum K_WAY value"
++  exit
++fi
++
++# parameter check --repeat
++if [ "$3" != "" ]; then
++  repeat="$3"
++fi
++
++echo "k-way:$k_way, num_repeat:$repeat"
++
++# create simulators in different directories 
++k=2
++while [ $k -le $k_way ]
++do
++  mkdir "build_$k"
++  cd "build_$k"
++  rm -rf *
++  cmake -DCMAKE_BUILD_TYPE=Release -DK_WAY_HEAP=$k ../../.
++  make dmclock-sims
++  cd ..
++  
++  k=$(( $k + 1 ))
++done
++
++# run simulators 
++echo '' > $output_file
++for config in "$config_dir"/*.conf
++do
++  k=2
++  while [ $k -le $k_way ]
++  do
++    cd "build_$k"
++    
++    # repeat same experiment
++    i=0
++    while [ $i -lt $repeat ]
++    do  
++      i=$(( $i + 1 ))
++      
++      # clear cache first
++      sync
++      #sudo sh -c 'echo 1 >/proc/sys/vm/drop_caches'
++      #sudo sh -c 'echo 2 >/proc/sys/vm/drop_caches'
++      #sudo sh -c 'echo 3 >/proc/sys/vm/drop_caches'
++
++      # run with heap
++      msg="file_name:$k:$config"
++      echo $msg >> ../$output_file
++      echo "running $msg ..."
++      ./sim/dmc_sim -c ../$config | awk '(/average/)' >> ../$output_file
++    done # end repeat
++    cd ..
++    k=$(( $k + 1 ))
++  done # end k_way
++done # end config
++
diff --cc src/dmclock/benchmark/data_parser.py
index 00000000000,00000000000..c90d85fd9ab
new file mode 100755
--- /dev/null
+++ b/src/dmclock/benchmark/data_parser.py
@@@ -1,0 -1,0 +1,191 @@@
++#!/usr/bin/env python
++
++class DataPoint:  
++  def __init__(self):                
++    self.nserver = 0;
++    self.nclient = 0;
++    self.heap_type = 0;  
++    self.total_time_to_add_req = 0;
++    self.total_time_to_complete_req = 0;
++    self.config = ''
++
++  def set_name(self, config, heap_type):
++    self.config = config;
++    self.heap_type = heap_type
++
++  def get_conig(self):
++    import re
++    return re.split(r"/|\.", self.config)[1]
++
++  def __str__(self):
++    return "s:%d, c:%d,h:%d,config:%s"%(self.nserver, self.nclient, self.heap_type, self.config);
++# end DataPoint
++
++
++def isFloat(elem):        
++ try:
++  float(elem)
++  return True
++ except ValueError:
++  return False
++#end isFloat
++
++
++def parse_config_params(fname):
++  nclient = 0;
++  nserver = 0;
++  # read config file property 
++  with open(fname, 'r') as f:
++    for line in f:
++      line = line.strip('\n \t')
++      if not line: continue;
++      if line.startswith("client_count"):
++        nclient += int(line.split('=')[-1]);
++      if line.startswith("server_count"): 
++        nserver += int(line.split('=')[-1]);
++  # end of file
++  return [nserver, nclient];
++# parse_config_params
++
++def make_aggregate_data_point(dps, config, heap_type): 
++    # create new aggregate point
++    dp = DataPoint();
++    # set set and k_way_heap property
++    dp.set_name(config, heap_type); 
++    
++    num_run = 0
++    for _dp in dps:
++      if _dp.config == config and _dp.heap_type == heap_type:
++        # print _dp, config, heap_type
++        dp.nserver =_dp.nserver
++        dp.nclient = _dp.nclient
++        num_run                       += 1
++        dp.total_time_to_add_req      += _dp.total_time_to_add_req
++        dp.total_time_to_complete_req += _dp.total_time_to_complete_req 
++        
++    # average
++    dp.total_time_to_add_req      /= num_run;
++    dp.total_time_to_complete_req /= num_run
++    #print dp
++    return dp;
++
++def parse_data_points(filename):
++  dps = []; #data-points
++  dp = None;
++  state = 0;
++  configs = {}
++  k_ways  = {}
++  
++  with open(filename, 'r') as f:
++    for line in f:
++      line = line.strip('\n \t')
++      if not line: continue;
++      
++      # file_name:1:configs/dmc_sim_8_6.conf
++      if line.startswith("file_name"):      
++        if dp:
++          dps.append(dp);
++          state = 0;
++         
++        # new data-point 
++        dp = DataPoint();
++        parts = line.split(':')
++        fname = parts[-1];        
++        dp.heap_type = int(parts[1]);
++        if dp.heap_type not in k_ways:
++          k_ways[dp.heap_type] = 1;
++        
++        # add to the dictionary
++        configs[fname] = 1;
++        
++        dp.config = fname;
++        params = parse_config_params(fname)      
++        dp.nserver = params[0];
++        dp.nclient = params[-1];
++         
++      elif line.startswith("average"):	# take last 2 averages
++        r = [float(s) for s in line.split(' ') if isFloat(s)]
++        state +=1;
++        #print r, dp #if isFloat(s)
++        if state == 3:
++          dp.total_time_to_add_req = r[0]
++        elif state == 4:
++          dp.total_time_to_complete_req = r[0]
++        else: pass
++
++      else: 
++        pass;    
++  # final entry
++  dps.append(dp) 
++  
++  # compute average of multiple runs
++  dps_avg = []
++  for config in configs:
++    data_per_config = []
++    for k in k_ways:
++      aggr_dp = make_aggregate_data_point(dps, config , k);
++      data_per_config.append(aggr_dp);
++    dps_avg.append(data_per_config);
++  # end for
++  return dps_avg;
++# end parse_data_points
++
++
++def create_header(num_cols):
++  fields = ['nserver_nclient(config_file)','add_req', 'complete_req'];
++  header = fields[0]
++  #write add_req_{1, ...}
++  for i in range(num_cols):
++    header = '%s %s_%i'%(header, fields[1], i+2)
++  #write complete_req_{1, ...}
++  for i in range(num_cols):
++    header = '%s %s_%i'%(header, fields[2], i+2)
++  # new-line
++  header = '%s\n'%(header)
++  return header
++# end create_header
++
++
++def create_data_line(aggr_dp):
++  # get common info
++  dp = aggr_dp[0]
++  data_line = "s:%d_c:%d "%(dp.nserver, dp.nclient);
++  # get the point-count
++  num_cols = len(aggr_dp);
++  # write add_req_{1, ...}
++  for i in range(num_cols):
++    data_line = '%s %f'%(data_line, aggr_dp[i].total_time_to_add_req)
++  # write complete_req_{1, ...}
++  for i in range(num_cols):
++    data_line = '%s %f'%(data_line, aggr_dp[i].total_time_to_complete_req)
++  # new-line
++  data_line = '%s\n'%(data_line)
++  return data_line
++# end create_data_line
++
++    
++def make_data(filename):
++  # write the aggregated point in space separated file  
++  dps = parse_data_points(filename);
++  if not len(dps) : return
++  print "total points: ", len(dps)
++  # open file
++  with open('%s.dat'%(filename), 'w+') as f:
++    # write header
++    f.write(create_header(len(dps[0])));
++    # write data-line
++    for aggr_dp in dps:
++    	f.write(create_data_line(aggr_dp));
++
++
++def main(output_file):
++  print output_file
++  make_data(output_file);
++
++import sys
++if __name__ == "__main__":
++  file_name="result"
++  if len(sys.argv) > 1:
++    file_name=sys.argv[1].strip()
++  main(file_name)
++
diff --cc src/dmclock/benchmark/plot_gen.sh
index 00000000000,00000000000..d90bde1921a
new file mode 100755
--- /dev/null
+++ b/src/dmclock/benchmark/plot_gen.sh
@@@ -1,0 -1,0 +1,60 @@@
++#!/bin/bash
++
++if [ "$1" != "" ]; then
++  output_file="$1"
++else
++  echo "Please provide the name of the output file"
++  exit
++fi
++
++# parameter check -- k-value
++if [ "$2" != "" ]; then
++  k_way="$2"
++else
++  echo "Please provide the maximum K_WAY value"
++  exit
++fi
++#echo "k-way: $k_way"
++#exit
++
++gnuplot << EOF
++
++# Note you need gnuplot 4.4 for the pdfcairo terminal.
++clear
++reset
++
++set terminal pdfcairo size 7in,5in font "Gill Sans,5" linewidth 1 rounded fontscale .8 noenhanced
++set output "${output_file}.pdf"
++
++# starts multiplot
++set multiplot layout 2,1
++
++# Line style for axes
++set style line 80 lt rgb "#808080"
++
++# Line style for grid
++set style line 81 lt 0  # dashed
++set style line 81 lt rgb "#808080"  # grey
++
++set grid back linestyle 81
++set border 3 back linestyle 80 
++
++#set xtics rotate out
++set style data histogram
++set style histogram clustered
++
++set style fill solid border
++set xlabel 'Heap Timing for different K values'   
++set ylabel 'Time (nanosec)'        
++set key top right
++
++set yrange [0:*]
++
++# plot 1
++set title 'Request Addition Time'
++plot for [COL=2:($k_way + 1)] '${output_file}.dat' using COL:xticlabels(1) title columnheader
++
++# plot 2
++set title 'Request Completion Time'
++plot for [COL=($k_way + 2):(2 * $k_way + 1)] '${output_file}.dat' using COL:xticlabels(1) title columnheader
++EOF
diff --cc src/dmclock/benchmark/run.sh
index 00000000000,00000000000..11432b53008
new file mode 100755
--- /dev/null
+++ b/src/dmclock/benchmark/run.sh
@@@ -1,0 -1,0 +1,24 @@@
++#!/bin/bash
++
++# default value
++k_way=3 #11
++repeat=2 #5
++
++output_file="" 
++if [ "$1" != "" ]; then
++  output_file="$1"
++else
++  echo "Please provide the name of the output file"
++  exit
++fi
++
++echo "generating file ${output_file}"
++sh data_gen.sh ${output_file} ${k_way} ${repeat}
++
++echo "converting ${output_file} to ${output_file}.dat"
++python data_parser.py ${output_file}
++
++echo "now generating bar-chart"
++#gnuplot -e 'output_file=value'  plot_gen.gnuplot 
++sh plot_gen.sh  ${output_file} ${k_way}
++echo "done! check ${output_file}.pdf"
diff --cc src/dmclock/cmake/modules/Findboost.cmake
index 00000000000,00000000000..4f0dfd052f0
new file mode 100644
--- /dev/null
+++ b/src/dmclock/cmake/modules/Findboost.cmake
@@@ -1,0 -1,0 +1,15 @@@
++# - Find boost
++
++find_path(BOOST_INCLUDE_DIR NAMES boost/variant.hpp
++    PATHS /usr/include /usr/local/include ${BOOST_DIR}/include)
++
++include(FindPackageHandleStandardArgs)
++FIND_PACKAGE_HANDLE_STANDARD_ARGS(boost
++  REQUIRED_VARS BOOST_INCLUDE_DIR)
++
++if(boost_FOUND)
++  set(BOOST_FOUND 1)
++endif()
++if(BOOST_FOUND)
++  set(BOOST_INCLUDES ${BOOST_INCLUDE_DIR})
++endif()
diff --cc src/dmclock/cmake/modules/Findgtest.cmake
index 00000000000,00000000000..bfe0980e4ed
new file mode 100644
--- /dev/null
+++ b/src/dmclock/cmake/modules/Findgtest.cmake
@@@ -1,0 -1,0 +1,48 @@@
++# - Find gtest
++#
++#  GTEST_INCLUDE_DIRS   - where to find mcas/mcas.h, etc.
++#  GTEST_LIBRARIES      - List of libraries when using mcas.
++#  GTEST_FOUND          - True if mcas found.
++#
++#  GMOCK_INCLUDE_DIRS   - where to find mcas/mcas.h, etc.
++#  GMOCK_LIBRARIES      - List of libraries when using mcas.
++#  GMOCK_FOUND          - True if mcas found.
++
++
++## GTEST
++
++find_path(GTEST_INCLUDE_DIRS NAMES gtest/gtest.h
++    PATHS /usr/include /usr/local/include)
++
++find_library(GTEST_LIBRARY gtest
++  PATHS /usr/local/lib /usr/lib64)
++
++find_library(GTEST_MAIN_LIBRARY gtest_main
++  PATHS /usr/local/lib /usr/lib64)
++
++include(FindPackageHandleStandardArgs)
++FIND_PACKAGE_HANDLE_STANDARD_ARGS(gtest
++  REQUIRED_VARS GTEST_LIBRARY GTEST_MAIN_LIBRARY GTEST_INCLUDE_DIRS)
++
++if(gtest_FOUND)
++  set(GTEST_FOUND 1)
++endif()
++
++## GMOCK
++
++find_path(GMOCK_INCLUDE_DIRS NAMES gmock/gmock.h
++    PATHS /usr/include /usr/local/include)
++
++find_library(GMOCK_LIBRARY gmock
++  PATHS /usr/local/lib /usr/lib64)
++
++find_library(GMOCK_MAIN_LIBRARY gmock_main
++  PATHS /usr/local/lib /usr/lib64)
++
++include(FindPackageHandleStandardArgs)
++FIND_PACKAGE_HANDLE_STANDARD_ARGS(gmock
++  REQUIRED_VARS GMOCK_LIBRARY GMOCK_MAIN_LIBRARY GMOCK_INCLUDE_DIRS)
++
++if(gmock_FOUND)
++  set(GMOCK_FOUND 1)
++endif()
diff --cc src/dmclock/dmclock-config.cmake.in
index 00000000000,00000000000..01636532c1d
new file mode 100644
--- /dev/null
+++ b/src/dmclock/dmclock-config.cmake.in
@@@ -1,0 -1,0 +1,17 @@@
++# - Config file for the FooBar package
++# It defines the following variables
++#  DMCLOCK_INCLUDE_DIRS - include directories for FooBar
++#  DMCLOCK_LIBRARIES    - libraries to link against
++ 
++# Compute paths
++get_filename_component(DMCLOCK_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
++set(DMCLOCK_INCLUDE_DIRS "${DMCLOCK_CMAKE_DIR}/src")
++# set(DMCLOCK_INCLUDE_DIRS "@CONF_INCLUDE_DIRS@")
++ 
++# Our library dependencies (contains definitions for IMPORTED targets)
++if(NOT TARGET dmclock AND NOT dmclock_BINARY_DIR)
++  include("${DMCLOCK_CMAKE_DIR}/dmclock-targets.cmake")
++endif()
++ 
++# These are IMPORTED targets created by FooBarTargets.cmake
++set(DMCLOCK_LIBRARIES dmclock)
diff --cc src/dmclock/dmclock-targets.cmake
index 00000000000,00000000000..2c84f34a142
new file mode 100644
--- /dev/null
+++ b/src/dmclock/dmclock-targets.cmake
@@@ -1,0 -1,0 +1,1 @@@
++export(PACKAGE dmclock)
diff --cc src/dmclock/sim/CMakeLists.txt
index 00000000000,00000000000..febd4f0ab6f
new file mode 100644
--- /dev/null
+++ b/src/dmclock/sim/CMakeLists.txt
@@@ -1,0 -1,0 +1,1 @@@
++add_subdirectory(src)
diff --cc src/dmclock/sim/dmc_sim_100th.conf
index 00000000000,00000000000..17d0043548e
new file mode 100644
--- /dev/null
+++ b/src/dmclock/sim/dmc_sim_100th.conf
@@@ -1,0 -1,0 +1,32 @@@
++[global]
++server_groups = 1
++client_groups = 2
++server_random_selection = true
++server_soft_limit = true
++
++[client.0]
++client_count = 99
++client_wait = 0
++client_total_ops = 1000
++client_server_select_range = 10
++client_iops_goal = 50
++client_outstanding_ops = 100
++client_reservation = 20.0
++client_limit = 60.0
++client_weight = 1.0
++
++[client.1]
++client_count = 1
++client_wait = 10
++client_total_ops = 1000
++client_server_select_range = 10
++client_iops_goal = 50
++client_outstanding_ops = 100
++client_reservation = 20.0
++client_limit = 60.0
++client_weight = 1.0
++
++[server.0]
++server_count = 100
++server_iops = 40
++server_threads = 1
diff --cc src/dmclock/sim/dmc_sim_example.conf
index 00000000000,00000000000..989f2f08281
new file mode 100644
--- /dev/null
+++ b/src/dmclock/sim/dmc_sim_example.conf
@@@ -1,0 -1,0 +1,43 @@@
++[global]
++server_groups = 1
++client_groups = 3
++server_random_selection = false
++server_soft_limit = false
++
++[client.0]
++client_count = 1
++client_wait = 0
++client_total_ops = 2000
++client_server_select_range = 1
++client_iops_goal = 200
++client_outstanding_ops = 32
++client_reservation = 0.0
++client_limit = 0.0
++client_weight = 1.0
++
++[client.1]
++client_count = 1
++client_wait = 5
++client_total_ops = 2000
++client_server_select_range = 1
++client_iops_goal = 200
++client_outstanding_ops = 32
++client_reservation = 0.0
++client_limit = 40.0
++client_weight = 1.0
++
++[client.2]
++client_count = 1
++client_wait = 10
++client_total_ops = 2000
++client_server_select_range = 1
++client_iops_goal = 200
++client_outstanding_ops = 32
++client_reservation = 0.0
++client_limit = 50.0
++client_weight = 2.0
++
++[server.0]
++server_count = 1
++server_iops = 160
++server_threads = 1
diff --cc src/dmclock/sim/src/CMakeLists.txt
index 00000000000,00000000000..426827b03f2
new file mode 100644
--- /dev/null
+++ b/src/dmclock/sim/src/CMakeLists.txt
@@@ -1,0 -1,0 +1,42 @@@
++include_directories(ssched) # ssched code
++include_directories(../../src) # dmclock code
++include_directories(../../support/src)
++include_directories(${BOOST_INCLUDE_DIR})
++
++set(local_flags "-Wall -pthread ${CMAKE_CXX_SIM_FLAGS}")
++
++set(ssched_sim_srcs test_ssched.cc test_ssched_main.cc)
++set(dmc_sim_srcs test_dmclock.cc test_dmclock_main.cc)
++set(config_srcs config.cc str_list.cc ConfUtils.cc)
++
++set_source_files_properties(${ssched_sim_srcs} ${dmc_sim_srcs} ${dmc_srcs} ${config_srcs}
++  PROPERTIES
++  COMPILE_FLAGS "${local_flags}"
++  )
++
++if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
++  set(warnings_off " -Wno-unused-variable -Wno-unused-function")
++elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
++  set(warnings_off " -Wno-unused-but-set-variable -Wno-unused-function")
++endif()
++
++# append warning flags to certain source files
++set_property(
++  SOURCE ${ssched_sim_srcs} ${dmc_sim_srcs} ${config_srcs}
++  APPEND_STRING
++  PROPERTY COMPILE_FLAGS "${warnings_off}"
++  )
++
++add_executable(ssched_sim EXCLUDE_FROM_ALL ${ssched_sim_srcs})
++add_executable(dmc_sim EXCLUDE_FROM_ALL ${dmc_sim_srcs} ${config_srcs})
++
++set_target_properties(ssched_sim dmc_sim
++  PROPERTIES
++  RUNTIME_OUTPUT_DIRECTORY ..)
++
++add_dependencies(dmc_sim dmclock)
++
++target_link_libraries(ssched_sim LINK_PRIVATE pthread)
++target_link_libraries(dmc_sim LINK_PRIVATE pthread $<TARGET_FILE:dmclock>)
++
++add_custom_target(dmclock-sims DEPENDS ssched_sim dmc_sim)
diff --cc src/dmclock/sim/src/ConfUtils.cc
index 00000000000,00000000000..74ddb06ee29
new file mode 100644
--- /dev/null
+++ b/src/dmclock/sim/src/ConfUtils.cc
@@@ -1,0 -1,0 +1,574 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++/*
++ * Ceph - scalable distributed file system
++ *
++ * Copyright (C) 2011 New Dream Network
++ *
++ * This is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License version 2.1, as published by the Free Software
++ * Foundation.  See file COPYING.
++ *
++ */
++
++#include <algorithm>
++#include <errno.h>
++#include <list>
++#include <map>
++#include <sstream>
++#include <stdio.h>
++#include <stdlib.h>
++#include <string.h>
++#include <string>
++#include <sys/stat.h>
++#include <sys/types.h>
++#include <unistd.h>
++#include <iostream>
++
++#include <assert.h>
++#include "ConfUtils.h"
++
++using std::cerr;
++using std::ostringstream;
++using std::pair;
++using std::string;
++
++#define MAX_CONFIG_FILE_SZ 0x40000000
++
++////////////////////////////// ConfLine //////////////////////////////
++ConfLine::
++ConfLine(const std::string &key_, const std::string val_,
++      const std::string newsection_, const std::string comment_, int line_no_)
++  : key(key_), val(val_), newsection(newsection_)
++{
++  // If you want to implement writable ConfFile support, you'll need to save
++  // the comment and line_no arguments here.
++}
++
++bool ConfLine::
++operator<(const ConfLine &rhs) const
++{
++  // We only compare keys.
++  // If you have more than one line with the same key in a given section, the
++  // last one wins.
++  if (key < rhs.key)
++    return true;
++  else
++    return false;
++}
++
++std::ostream &operator<<(std::ostream& oss, const ConfLine &l)
++{
++  oss << "ConfLine(key = '" << l.key << "', val='"
++      << l.val << "', newsection='" << l.newsection << "')";
++  return oss;
++}
++///////////////////////// ConfFile //////////////////////////
++ConfFile::
++ConfFile()
++{
++}
++
++ConfFile::
++~ConfFile()
++{
++}
++
++void ConfFile::
++clear()
++{
++  sections.clear();
++}
++
++/* We load the whole file into memory and then parse it.  Although this is not
++ * the optimal approach, it does mean that most of this code can be shared with
++ * the bufferlist loading function. Since bufferlists are always in-memory, the
++ * load_from_buffer interface works well for them.
++ * In general, configuration files should be a few kilobytes at maximum, so
++ * loading the whole configuration into memory shouldn't be a problem.
++ */
++int ConfFile::
++parse_file(const std::string &fname, std::deque<std::string> *errors,
++	   std::ostream *warnings)
++{
++  clear();
++
++  int ret = 0;
++  size_t sz;
++  char *buf = NULL;
++  char buf2[128];
++  FILE *fp = fopen(fname.c_str(), "r");
++  if (!fp) {
++    ret = -errno;
++    return ret;
++  }
++
++  struct stat st_buf;
++  if (fstat(fileno(fp), &st_buf)) {
++    ret = -errno;
++    ostringstream oss;
++    oss << "read_conf: failed to fstat '" << fname << "': " << strerror_r(ret, buf2, sizeof(buf2));
++    errors->push_back(oss.str());
++    goto done;
++  }
++
++  if (st_buf.st_size > MAX_CONFIG_FILE_SZ) {
++    ostringstream oss;
++    oss << "read_conf: config file '" << fname << "' is " << st_buf.st_size
++	<< " bytes, but the maximum is " << MAX_CONFIG_FILE_SZ;
++    errors->push_back(oss.str());
++    ret = -EINVAL;
++    goto done;
++  }
++
++  sz = (size_t)st_buf.st_size;
++  buf = (char*)malloc(sz);
++  if (!buf) {
++    ret = -ENOMEM;
++    goto done;
++  }
++
++  if (fread(buf, 1, sz, fp) != sz) {
++    if (ferror(fp)) {
++      ret = -errno;
++      ostringstream oss;
++      oss << "read_conf: fread error while reading '" << fname << "': "
++	  << strerror_r(ret, buf2, sizeof(buf2));
++      errors->push_back(oss.str());
++      goto done;
++    }
++    else {
++      ostringstream oss;
++      oss << "read_conf: unexpected EOF while reading '" << fname << "': "
++	  << "possible concurrent modification?";
++      errors->push_back(oss.str());
++      ret = -EIO;
++      goto done;
++    }
++  }
++
++  load_from_buffer(buf, sz, errors, warnings);
++  ret = 0;
++
++done:
++  free(buf);
++  fclose(fp);
++  return ret;
++}
++
++int ConfFile::
++read(const std::string &section, const std::string &key, std::string &val) const
++{
++  string k(normalize_key_name(key));
++
++  const_section_iter_t s = sections.find(section);
++  if (s == sections.end())
++    return -ENOENT;
++  ConfLine exemplar(k, "", "", "", 0);
++  ConfSection::const_line_iter_t l = s->second.lines.find(exemplar);
++  if (l == s->second.lines.end())
++    return -ENOENT;
++  val = l->val;
++  return 0;
++}
++
++ConfFile::const_section_iter_t ConfFile::
++sections_begin() const
++{
++  return sections.begin();
++}
++
++ConfFile::const_section_iter_t ConfFile::
++sections_end() const
++{
++  return sections.end();
++}
++
++void ConfFile::
++trim_whitespace(std::string &str, bool strip_internal)
++{
++  // strip preceding
++  const char *in = str.c_str();
++  while (true) {
++    char c = *in;
++    if ((!c) || (!isspace(c)))
++      break;
++    ++in;
++  }
++  char output[strlen(in) + 1];
++  strcpy(output, in);
++
++  // strip trailing
++  char *o = output + strlen(output);
++  while (true) {
++    if (o == output)
++      break;
++    --o;
++    if (!isspace(*o)) {
++      ++o;
++      *o = '\0';
++      break;
++    }
++  }
++
++  if (!strip_internal) {
++    str.assign(output);
++    return;
++  }
++
++  // strip internal
++  char output2[strlen(output) + 1];
++  char *out2 = output2;
++  bool prev_was_space = false;
++  for (char *u = output; *u; ++u) {
++    char c = *u;
++    if (isspace(c)) {
++      if (!prev_was_space)
++	*out2++ = c;
++      prev_was_space = true;
++    }
++    else {
++      *out2++ = c;
++      prev_was_space = false;
++    }
++  }
++  *out2++ = '\0';
++  str.assign(output2);
++}
++
++/* Normalize a key name.
++ *
++ * Normalized key names have no leading or trailing whitespace, and all
++ * whitespace is stored as underscores.  The main reason for selecting this
++ * normal form is so that in common/config.cc, we can use a macro to stringify
++ * the field names of md_config_t and get a key in normal form.
++ */
++std::string ConfFile::
++normalize_key_name(const std::string &key)
++{
++  string k(key);
++  ConfFile::trim_whitespace(k, true);
++  std::replace(k.begin(), k.end(), ' ', '_');
++  return k;
++}
++
++std::ostream &operator<<(std::ostream &oss, const ConfFile &cf)
++{
++  for (ConfFile::const_section_iter_t s = cf.sections_begin();
++       s != cf.sections_end(); ++s) {
++    oss << "[" << s->first << "]\n";
++    for (ConfSection::const_line_iter_t l = s->second.lines.begin();
++	 l != s->second.lines.end(); ++l) {
++      if (!l->key.empty()) {
++	oss << "\t" << l->key << " = \"" << l->val << "\"\n";
++      }
++    }
++  }
++  return oss;
++}
++
++void ConfFile::
++load_from_buffer(const char *buf, size_t sz, std::deque<std::string> *errors,
++		 std::ostream *warnings)
++{
++  errors->clear();
++
++  section_iter_t::value_type vt("global", ConfSection());
++  pair < section_iter_t, bool > vr(sections.insert(vt));
++  assert(vr.second);
++  section_iter_t cur_section = vr.first;
++  std::string acc;
++
++  const char *b = buf;
++  int line_no = 0;
++  size_t line_len = -1;
++  size_t rem = sz;
++  while (1) {
++    b += line_len + 1;
++    rem -= line_len + 1;
++    if (rem == 0)
++      break;
++    line_no++;
++
++    // look for the next newline
++    const char *end = (const char*)memchr(b, '\n', rem);
++    if (!end) {
++      ostringstream oss;
++      oss << "read_conf: ignoring line " << line_no << " because it doesn't "
++	  << "end with a newline! Please end the config file with a newline.";
++      errors->push_back(oss.str());
++      break;
++    }
++
++    // find length of line, and search for NULLs
++    line_len = 0;
++    bool found_null = false;
++    for (const char *tmp = b; tmp != end; ++tmp) {
++      line_len++;
++      if (*tmp == '\0') {
++	found_null = true;
++      }
++    }
++
++    if (found_null) {
++      ostringstream oss;
++      oss << "read_conf: ignoring line " << line_no << " because it has "
++	  << "an embedded null.";
++      errors->push_back(oss.str());
++      acc.clear();
++      continue;
++    }
++
++    if ((line_len >= 1) && (b[line_len-1] == '\\')) {
++      // A backslash at the end of a line serves as a line continuation marker.
++      // Combine the next line with this one.
++      // Remove the backslash itself from the text.
++      acc.append(b, line_len - 1);
++      continue;
++    }
++
++    acc.append(b, line_len);
++
++    //cerr << "acc = '" << acc << "'" << std::endl;
++    ConfLine *cline = process_line(line_no, acc.c_str(), errors);
++    acc.clear();
++    if (!cline)
++      continue;
++    const std::string &csection(cline->newsection);
++    if (!csection.empty()) {
++      std::map <std::string, ConfSection>::value_type nt(csection, ConfSection());
++      pair < section_iter_t, bool > nr(sections.insert(nt));
++      cur_section = nr.first;
++    }
++    else {
++      if (cur_section->second.lines.count(*cline)) {
++	// replace an existing key/line in this section, so that
++	//  [mysection]
++	//    foo = 1
++	//    foo = 2
++	// will result in foo = 2.
++	cur_section->second.lines.erase(*cline);
++	if (cline->key.length() && warnings)
++	  *warnings << "warning: line " << line_no << ": '" << cline->key << "' in section '"
++		    << cur_section->first << "' redefined " << std::endl;
++      }
++      // add line to current section
++      //std::cerr << "cur_section = " << cur_section->first << ", " << *cline << std::endl;
++      cur_section->second.lines.insert(*cline);
++    }
++    delete cline;
++  }
++
++  if (!acc.empty()) {
++    ostringstream oss;
++    oss << "read_conf: don't end with lines that end in backslashes!";
++    errors->push_back(oss.str());
++  }
++}
++
++/*
++ * A simple state-machine based parser.
++ * This probably could/should be rewritten with something like boost::spirit
++ * or yacc if the grammar ever gets more complex.
++ */
++ConfLine* ConfFile::
++process_line(int line_no, const char *line, std::deque<std::string> *errors)
++{
++  enum acceptor_state_t {
++    ACCEPT_INIT,
++    ACCEPT_SECTION_NAME,
++    ACCEPT_KEY,
++    ACCEPT_VAL_START,
++    ACCEPT_UNQUOTED_VAL,
++    ACCEPT_QUOTED_VAL,
++    ACCEPT_COMMENT_START,
++    ACCEPT_COMMENT_TEXT,
++  };
++  const char *l = line;
++  acceptor_state_t state = ACCEPT_INIT;
++  string key, val, newsection, comment;
++  bool escaping = false;
++  while (true) {
++    char c = *l++;
++    switch (state) {
++      case ACCEPT_INIT:
++	if (c == '\0')
++	  return NULL; // blank line. Not an error, but not interesting either.
++	else if (c == '[')
++	  state = ACCEPT_SECTION_NAME;
++	else if ((c == '#') || (c == ';'))
++	  state = ACCEPT_COMMENT_TEXT;
++	else if (c == ']') {
++	  ostringstream oss;
++	  oss << "unexpected right bracket at char " << (l - line)
++	      << ", line " << line_no;
++	  errors->push_back(oss.str());
++	  return NULL;
++	}
++	else if (isspace(c)) {
++	  // ignore whitespace here
++	}
++	else {
++	  // try to accept this character as a key
++	  state = ACCEPT_KEY;
++	  --l;
++	}
++	break;
++      case ACCEPT_SECTION_NAME:
++	if (c == '\0') {
++	  ostringstream oss;
++	  oss << "error parsing new section name: expected right bracket "
++	      << "at char " << (l - line) << ", line " << line_no;
++	  errors->push_back(oss.str());
++	  return NULL;
++	}
++	else if ((c == ']') && (!escaping)) {
++	  trim_whitespace(newsection, true);
++	  if (newsection.empty()) {
++	    ostringstream oss;
++	    oss << "error parsing new section name: no section name found? "
++	        << "at char " << (l - line) << ", line " << line_no;
++	    errors->push_back(oss.str());
++	    return NULL;
++	  }
++	  state = ACCEPT_COMMENT_START;
++	}
++	else if (((c == '#') || (c == ';')) && (!escaping)) {
++	  ostringstream oss;
++	  oss << "unexpected comment marker while parsing new section name, at "
++	      << "char " << (l - line) << ", line " << line_no;
++	  errors->push_back(oss.str());
++	  return NULL;
++	}
++	else if ((c == '\\') && (!escaping)) {
++	  escaping = true;
++	}
++	else {
++	  escaping = false;
++	  newsection += c;
++	}
++	break;
++      case ACCEPT_KEY:
++	if ((((c == '#') || (c == ';')) && (!escaping)) || (c == '\0')) {
++	  ostringstream oss;
++	  if (c == '\0') {
++	    oss << "end of key=val line " << line_no
++	        << " reached, no \"=val\" found...missing =?";
++	  } else {
++	    oss << "unexpected character while parsing putative key value, "
++		<< "at char " << (l - line) << ", line " << line_no;
++	  }
++	  errors->push_back(oss.str());
++	  return NULL;
++	}
++	else if ((c == '=') && (!escaping)) {
++	  key = normalize_key_name(key);
++	  if (key.empty()) {
++	    ostringstream oss;
++	    oss << "error parsing key name: no key name found? "
++	        << "at char " << (l - line) << ", line " << line_no;
++	    errors->push_back(oss.str());
++	    return NULL;
++	  }
++	  state = ACCEPT_VAL_START;
++	}
++	else if ((c == '\\') && (!escaping)) {
++	  escaping = true;
++	}
++	else {
++	  escaping = false;
++	  key += c;
++	}
++	break;
++      case ACCEPT_VAL_START:
++	if (c == '\0')
++	  return new ConfLine(key, val, newsection, comment, line_no);
++	else if ((c == '#') || (c == ';'))
++	  state = ACCEPT_COMMENT_TEXT;
++	else if (c == '"')
++	  state = ACCEPT_QUOTED_VAL;
++	else if (isspace(c)) {
++	  // ignore whitespace
++	}
++	else {
++	  // try to accept character as a val
++	  state = ACCEPT_UNQUOTED_VAL;
++	  --l;
++	}
++	break;
++      case ACCEPT_UNQUOTED_VAL:
++	if (c == '\0') {
++	  if (escaping) {
++	    ostringstream oss;
++	    oss << "error parsing value name: unterminated escape sequence "
++	        << "at char " << (l - line) << ", line " << line_no;
++	    errors->push_back(oss.str());
++	    return NULL;
++	  }
++	  trim_whitespace(val, false);
++	  return new ConfLine(key, val, newsection, comment, line_no);
++	}
++	else if (((c == '#') || (c == ';')) && (!escaping)) {
++	  trim_whitespace(val, false);
++	  state = ACCEPT_COMMENT_TEXT;
++	}
++	else if ((c == '\\') && (!escaping)) {
++	  escaping = true;
++	}
++	else {
++	  escaping = false;
++	  val += c;
++	}
++	break;
++      case ACCEPT_QUOTED_VAL:
++	if (c == '\0') {
++	  ostringstream oss;
++	  oss << "found opening quote for value, but not the closing quote. "
++	      << "line " << line_no;
++	  errors->push_back(oss.str());
++	  return NULL;
++	}
++	else if ((c == '"') && (!escaping)) {
++	  state = ACCEPT_COMMENT_START;
++	}
++	else if ((c == '\\') && (!escaping)) {
++	  escaping = true;
++	}
++	else {
++	  escaping = false;
++	  // Add anything, including whitespace.
++	  val += c;
++	}
++	break;
++      case ACCEPT_COMMENT_START:
++	if (c == '\0') {
++	  return new ConfLine(key, val, newsection, comment, line_no);
++	}
++	else if ((c == '#') || (c == ';')) {
++	  state = ACCEPT_COMMENT_TEXT;
++	}
++	else if (isspace(c)) {
++	  // ignore whitespace
++	}
++	else {
++	  ostringstream oss;
++	  oss << "unexpected character at char " << (l - line) << " of line "
++	      << line_no;
++	  errors->push_back(oss.str());
++	  return NULL;
++	}
++	break;
++      case ACCEPT_COMMENT_TEXT:
++	if (c == '\0')
++	  return new ConfLine(key, val, newsection, comment, line_no);
++	else
++	  comment += c;
++	break;
++      default:
++	assert(0);
++	break;
++    }
++    assert(c != '\0'); // We better not go past the end of the input string.
++  }
++}
diff --cc src/dmclock/sim/src/ConfUtils.h
index 00000000000,00000000000..6c9c2c6c9c8
new file mode 100644
--- /dev/null
+++ b/src/dmclock/sim/src/ConfUtils.h
@@@ -1,0 -1,0 +1,83 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++/*
++ * Ceph - scalable distributed file system
++ *
++ * Copyright (C) 2011 New Dream Network
++ *
++ * This is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License version 2.1, as published by the Free Software
++ * Foundation.  See file COPYING.
++ *
++ */
++
++#ifndef CEPH_CONFUTILS_H
++#define CEPH_CONFUTILS_H
++
++#include <deque>
++#include <map>
++#include <set>
++#include <string>
++
++/*
++ * Ceph configuration file support.
++ *
++ * This class loads an INI-style configuration from a file or bufferlist, and
++ * holds it in memory. In general, an INI configuration file is composed of
++ * sections, which contain key/value pairs. You can put comments on the end of
++ * lines by using either a hash mark (#) or the semicolon (;).
++ *
++ * You can get information out of ConfFile by calling get_key or by examining
++ * individual sections.
++ *
++ * This class could be extended to support modifying configuration files and
++ * writing them back out without too much difficulty. Currently, this is not
++ * implemented, and the file is read-only.
++ */
++class ConfLine {
++public:
++  ConfLine(const std::string &key_, const std::string val_,
++	   const std::string newsection_, const std::string comment_, int line_no_);
++  bool operator<(const ConfLine &rhs) const;
++  friend std::ostream &operator<<(std::ostream& oss, const ConfLine &l);
++
++  std::string key, val, newsection;
++};
++
++class ConfSection {
++public:
++  typedef std::set <ConfLine>::const_iterator const_line_iter_t;
++
++  std::set <ConfLine> lines;
++};
++
++class ConfFile {
++public:
++  typedef std::map <std::string, ConfSection>::iterator section_iter_t;
++  typedef std::map <std::string, ConfSection>::const_iterator const_section_iter_t;
++
++  ConfFile();
++  ~ConfFile();
++  void clear();
++  int parse_file(const std::string &fname, std::deque<std::string> *errors, std::ostream *warnings);
++  int read(const std::string &section, const std::string &key,
++	      std::string &val) const;
++
++  const_section_iter_t sections_begin() const;
++  const_section_iter_t sections_end() const;
++
++  static void trim_whitespace(std::string &str, bool strip_internal);
++  static std::string normalize_key_name(const std::string &key);
++  friend std::ostream &operator<<(std::ostream &oss, const ConfFile &cf);
++
++private:
++  void load_from_buffer(const char *buf, size_t sz,
++			std::deque<std::string> *errors, std::ostream *warnings);
++  static ConfLine* process_line(int line_no, const char *line,
++			        std::deque<std::string> *errors);
++
++  std::map <std::string, ConfSection> sections;
++};
++
++#endif
diff --cc src/dmclock/sim/src/config.cc
index 00000000000,00000000000..a6702897cd6
new file mode 100644
--- /dev/null
+++ b/src/dmclock/sim/src/config.cc
@@@ -1,0 -1,0 +1,171 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++
++
++#include <unistd.h>
++#include <string.h>
++#include <stdarg.h>
++
++#include <iostream>
++#include <vector>
++#include <list>
++
++#include "config.h"
++#include "str_list.h"
++
++
++static void dashes_to_underscores(const char *input, char *output) {
++  char c = 0;
++  char *o = output;
++  const char *i = input;
++  // first two characters are copied as-is
++  *o = *i++;
++  if (*o++ == '\0')
++    return;
++  *o = *i++;
++  if (*o++ == '\0')
++    return;
++  for (; ((c = *i)); ++i) {
++    if (c == '=') {
++      strcpy(o, i);
++      return;
++    }
++    if (c == '-')
++      *o++ = '_';
++    else
++      *o++ = c;
++  }
++  *o++ = '\0';
++}
++
++static int va_ceph_argparse_witharg(std::vector<const char*> &args,
++	std::vector<const char*>::iterator &i, std::string *ret,
++	std::ostream &oss, va_list ap) {
++  const char *first = *i;
++  char tmp[strlen(first)+1];
++  dashes_to_underscores(first, tmp);
++  first = tmp;
++
++  // does this argument match any of the possibilities?
++  while (1) {
++    const char *a = va_arg(ap, char*);
++    if (a == NULL)
++      return 0;
++    int strlen_a = strlen(a);
++    char a2[strlen_a+1];
++    dashes_to_underscores(a, a2);
++    if (strncmp(a2, first, strlen(a2)) == 0) {
++      if (first[strlen_a] == '=') {
++	*ret = first + strlen_a + 1;
++	i = args.erase(i);
++	return 1;
++      }
++      else if (first[strlen_a] == '\0') {
++	// find second part (or not)
++	if (i+1 == args.end()) {
++	  oss << "Option " << *i << " requires an argument." << std::endl;
++	  i = args.erase(i);
++	  return -EINVAL;
++	}
++	i = args.erase(i);
++	*ret = *i;
++	i = args.erase(i);
++	return 1;
++      }
++    }
++  }
++}
++
++bool crimson::qos_simulation::ceph_argparse_witharg(std::vector<const char*> &args,
++	std::vector<const char*>::iterator &i, std::string *ret, ...) {
++  int r;
++  va_list ap;
++  va_start(ap, ret);
++  r = va_ceph_argparse_witharg(args, i, ret, std::cerr, ap);
++  va_end(ap);
++  if (r < 0)
++    _exit(1);
++  return r != 0;
++}
++
++void crimson::qos_simulation::ceph_argparse_early_args(std::vector<const char*>& args, std::string *conf_file_list) {
++  std::string val;
++
++  std::vector<const char *> orig_args = args;
++
++  for (std::vector<const char*>::iterator i = args.begin(); i != args.end(); ) {
++    if (ceph_argparse_witharg(args, i, &val, "--conf", "-c", (char*)NULL)) {
++      *conf_file_list = val;
++    }
++    else {
++      // ignore
++      ++i;
++    }
++  }
++  return;
++}
++
++static bool stobool(const std::string & v) {
++    return !v.empty () &&
++           (strcasecmp (v.c_str (), "true") == 0 ||
++	   atoi (v.c_str ()) != 0);
++}
++
++int crimson::qos_simulation::parse_config_file(const std::string &fname, sim_config_t &g_conf) {
++  ConfFile cf;
++  std::deque<std::string> err;
++  std::ostringstream warn;
++  int ret = cf.parse_file(fname.c_str(), &err, &warn);
++  if (ret) {
++    // error
++    return ret;
++  }
++
++  std::string val;
++  if (!cf.read("global", "server_groups", val))
++    g_conf.server_groups = std::stoul(val);
++  if (!cf.read("global", "client_groups", val))
++    g_conf.client_groups = std::stoul(val);
++  if (!cf.read("global", "server_random_selection", val))
++    g_conf.server_random_selection = stobool(val);
++  if (!cf.read("global", "server_soft_limit", val))
++    g_conf.server_soft_limit = stobool(val);
++
++  for (uint i = 0; i < g_conf.server_groups; i++) {
++    srv_group_t st;
++    std::string section = "server." + std::to_string(i);
++    if (!cf.read(section, "server_count", val))
++      st.server_count = std::stoul(val);
++    if (!cf.read(section, "server_iops", val))
++      st.server_iops = std::stoul(val);
++    if (!cf.read(section, "server_threads", val))
++      st.server_threads = std::stoul(val);
++    g_conf.srv_group.push_back(st);
++  }
++
++  for (uint i = 0; i < g_conf.client_groups; i++) {
++    cli_group_t ct;
++    std::string section = "client." + std::to_string(i);
++    if (!cf.read(section, "client_count", val))
++      ct.client_count = std::stoul(val);
++    if (!cf.read(section, "client_wait", val))
++      ct.client_wait = std::chrono::seconds(std::stoul(val));
++    if (!cf.read(section, "client_total_ops", val))
++      ct.client_total_ops = std::stoul(val);
++    if (!cf.read(section, "client_server_select_range", val))
++      ct.client_server_select_range = std::stoul(val);
++    if (!cf.read(section, "client_iops_goal", val))
++      ct.client_iops_goal = std::stoul(val);
++    if (!cf.read(section, "client_outstanding_ops", val))
++      ct.client_outstanding_ops = std::stoul(val);
++    if (!cf.read(section, "client_reservation", val))
++      ct.client_reservation = std::stod(val);
++    if (!cf.read(section, "client_limit", val))
++      ct.client_limit = std::stod(val);
++    if (!cf.read(section, "client_weight", val))
++      ct.client_weight = std::stod(val);
++    g_conf.cli_group.push_back(ct);
++  }
++
++  return 0;
++}
diff --cc src/dmclock/sim/src/config.h
index 00000000000,00000000000..010f33a743e
new file mode 100644
--- /dev/null
+++ b/src/dmclock/sim/src/config.h
@@@ -1,0 -1,0 +1,138 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++
++
++#pragma once
++
++
++#include <string.h>
++
++#include <chrono>
++#include <vector>
++#include <sstream>
++#include <iomanip>
++
++#include "ConfUtils.h"
++
++
++namespace crimson {
++  namespace qos_simulation {
++
++    struct cli_group_t {
++      uint client_count;
++      std::chrono::seconds client_wait;
++      uint client_total_ops;
++      uint client_server_select_range;
++      uint client_iops_goal;
++      uint client_outstanding_ops;
++      double client_reservation;
++      double client_limit;
++      double client_weight;
++
++      cli_group_t(uint _client_count = 100,
++		  uint _client_wait = 0,
++		  uint _client_total_ops = 1000,
++		  uint _client_server_select_range = 10,
++		  uint _client_iops_goal = 50,
++		  uint _client_outstanding_ops = 100,
++		  double _client_reservation = 20.0,
++		  double _client_limit = 60.0,
++		  double _client_weight = 1.0) :
++	client_count(_client_count),
++	client_wait(std::chrono::seconds(_client_wait)),
++	client_total_ops(_client_total_ops),
++	client_server_select_range(_client_server_select_range),
++	client_iops_goal(_client_iops_goal),
++	client_outstanding_ops(_client_outstanding_ops),
++	client_reservation(_client_reservation),
++	client_limit(_client_limit),
++	client_weight(_client_weight)
++      {
++	// empty
++      }
++
++      friend std::ostream& operator<<(std::ostream& out,
++	  const cli_group_t& cli_group) {
++	out <<
++	  "client_count = " << cli_group.client_count << "\n" <<
++	  "client_wait = " << cli_group.client_wait.count() << "\n" <<
++	  "client_total_ops = " << cli_group.client_total_ops << "\n" <<
++	  "client_server_select_range = " << cli_group.client_server_select_range << "\n" <<
++	  "client_iops_goal = " << cli_group.client_iops_goal << "\n" <<
++	  "client_outstanding_ops = " << cli_group.client_outstanding_ops << "\n" <<
++	  std::fixed << std::setprecision(1) <<
++	  "client_reservation = " << cli_group.client_reservation << "\n" <<
++	  "client_limit = " << cli_group.client_limit << "\n" <<
++	  "client_weight = " << cli_group.client_weight;
++	return out;
++      }
++    }; // class cli_group_t
++
++
++    struct srv_group_t {
++      uint server_count;
++      uint server_iops;
++      uint server_threads;
++
++      srv_group_t(uint _server_count = 100,
++		  uint _server_iops = 40,
++		  uint _server_threads = 1) :
++	server_count(_server_count),
++	server_iops(_server_iops),
++	server_threads(_server_threads)
++      {
++	// empty
++      }
++
++      friend std::ostream& operator<<(std::ostream& out,
++	  const srv_group_t& srv_group) {
++	out <<
++	  "server_count = " << srv_group.server_count << "\n" <<
++	  "server_iops = " << srv_group.server_iops << "\n" <<
++	  "server_threads = " << srv_group.server_threads;
++	return out;
++      }
++    }; // class srv_group_t
++
++
++    struct sim_config_t {
++      uint server_groups;
++      uint client_groups;
++      bool server_random_selection;
++      bool server_soft_limit;
++
++      std::vector<cli_group_t> cli_group;
++      std::vector<srv_group_t> srv_group;
++
++      sim_config_t(uint _server_groups = 1,
++		   uint _client_groups = 1,
++		   bool _server_random_selection = false,
++		   bool _server_soft_limit = true) :
++	server_groups(_server_groups),
++	client_groups(_client_groups),
++	server_random_selection(_server_random_selection),
++	server_soft_limit(_server_soft_limit)
++      {
++	srv_group.reserve(server_groups);
++	cli_group.reserve(client_groups);
++      }
++
++      friend std::ostream& operator<<(std::ostream& out,
++	  const sim_config_t& sim_config) {
++	out <<
++	  "server_groups = " << sim_config.server_groups << "\n" <<
++	  "client_groups = " << sim_config.client_groups << "\n" <<
++	  "server_random_selection = " << sim_config.server_random_selection << "\n" <<
++	  "server_soft_limit = " << sim_config.server_soft_limit;
++	return out;
++      }
++    }; // class sim_config_t
++
++
++    bool ceph_argparse_witharg(std::vector<const char*> &args,
++	std::vector<const char*>::iterator &i, std::string *ret, ...);
++    void ceph_argparse_early_args(std::vector<const char*>& args, std::string *conf_file_list);
++    int parse_config_file(const std::string &fname, sim_config_t &g_conf);
++
++  }; // namespace qos_simulation
++}; // namespace crimson
diff --cc src/dmclock/sim/src/sim_client.h
index 00000000000,00000000000..6538dab2c08
new file mode 100644
--- /dev/null
+++ b/src/dmclock/sim/src/sim_client.h
@@@ -1,0 -1,0 +1,329 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++
++/*
++ * Copyright (C) 2016 Red Hat Inc.
++ */
++
++
++#pragma once
++
++
++#include <atomic>
++#include <mutex>
++#include <condition_variable>
++#include <thread>
++#include <chrono>
++#include <vector>
++#include <deque>
++#include <iostream>
++
++#include "sim_recs.h"
++
++
++namespace crimson {
++  namespace qos_simulation {
++
++    struct req_op_t {};
++    struct wait_op_t {};
++    constexpr struct req_op_t req_op {};
++    constexpr struct wait_op_t wait_op {};
++
++
++    enum class CliOp { req, wait };
++    struct CliInst {
++      CliOp op;
++      union {
++	std::chrono::milliseconds wait_time;
++	struct {
++	  uint32_t count;
++	  std::chrono::microseconds time_bw_reqs;
++	  uint16_t max_outstanding;
++	} req_params;
++      } args;
++
++      // D is a duration type
++      template<typename D>
++      CliInst(wait_op_t, D duration) :
++	op(CliOp::wait)
++      {
++	args.wait_time =
++	  std::chrono::duration_cast<std::chrono::milliseconds>(duration);
++      }
++
++      CliInst(req_op_t,
++	      uint32_t count, double ops_per_sec, uint16_t max_outstanding) :
++	op(CliOp::req)
++      {
++	args.req_params.count = count;
++	args.req_params.max_outstanding = max_outstanding;
++	uint32_t us = uint32_t(0.5 + 1.0 / ops_per_sec * 1000000);
++	args.req_params.time_bw_reqs = std::chrono::microseconds(us);
++      }
++    };
++
++
++    using ServerSelectFunc = std::function<const ServerId&(uint64_t seed)>;
++
++
++    template<typename SvcTrk, typename ReqPm, typename RespPm, typename Accum>
++    class SimulatedClient {
++    public:
++
++      struct InternalStats {
++	std::mutex mtx;
++	std::chrono::nanoseconds track_resp_time;
++	std::chrono::nanoseconds get_req_params_time;
++	uint32_t track_resp_count;
++	uint32_t get_req_params_count;
++
++	InternalStats() :
++	  track_resp_time(0),
++	  get_req_params_time(0),
++	  track_resp_count(0),
++	  get_req_params_count(0)
++	{
++	  // empty
++	}
++      };
++
++      using SubmitFunc =
++	std::function<void(const ServerId&,
++			   const TestRequest&,
++			   const ClientId&,
++			   const ReqPm&)>;
++
++      using ClientAccumFunc = std::function<void(Accum&,const RespPm&)>;
++
++      typedef std::chrono::time_point<std::chrono::steady_clock> TimePoint;
++
++      static TimePoint now() { return std::chrono::steady_clock::now(); }
++
++    protected:
++
++      struct RespQueueItem {
++	TestResponse response;
++	ServerId     server_id;
++	RespPm       resp_params;
++      };
++
++      const ClientId id;
++      const SubmitFunc submit_f;
++      const ServerSelectFunc server_select_f;
++      const ClientAccumFunc accum_f;
++
++      std::vector<CliInst> instructions;
++
++      SvcTrk service_tracker;
++
++      // TODO: use lock rather than atomic???
++      std::atomic_ulong        outstanding_ops;
++      std::atomic_bool         requests_complete;
++
++      std::deque<RespQueueItem> resp_queue;
++
++      std::mutex               mtx_req;
++      std::condition_variable  cv_req;
++
++      std::mutex               mtx_resp;
++      std::condition_variable  cv_resp;
++
++      using RespGuard = std::lock_guard<decltype(mtx_resp)>;
++      using Lock = std::unique_lock<std::mutex>;
++
++      // data collection
++
++      std::vector<TimePoint>   op_times;
++      Accum                    accumulator;
++      InternalStats            internal_stats;
++
++      std::thread              thd_req;
++      std::thread              thd_resp;
++
++    public:
++
++      SimulatedClient(ClientId _id,
++		      const SubmitFunc& _submit_f,
++		      const ServerSelectFunc& _server_select_f,
++		      const ClientAccumFunc& _accum_f,
++		      const std::vector<CliInst>& _instrs) :
++	id(_id),
++	submit_f(_submit_f),
++	server_select_f(_server_select_f),
++	accum_f(_accum_f),
++	instructions(_instrs),
++	service_tracker(),
++	outstanding_ops(0),
++	requests_complete(false)
++      {
++	size_t op_count = 0;
++	for (auto i : instructions) {
++	  if (CliOp::req == i.op) {
++	    op_count += i.args.req_params.count;
++	  }
++	}
++	op_times.reserve(op_count);
++
++	thd_resp = std::thread(&SimulatedClient::run_resp, this);
++	thd_req = std::thread(&SimulatedClient::run_req, this);
++      }
++
++
++      SimulatedClient(ClientId _id,
++		      const SubmitFunc& _submit_f,
++		      const ServerSelectFunc& _server_select_f,
++		      const ClientAccumFunc& _accum_f,
++		      uint16_t _ops_to_run,
++		      double _iops_goal,
++		      uint16_t _outstanding_ops_allowed) :
++	SimulatedClient(_id,
++			_submit_f, _server_select_f, _accum_f,
++			{{req_op, _ops_to_run, _iops_goal, _outstanding_ops_allowed}})
++      {
++	// empty
++      }
++
++
++      SimulatedClient(const SimulatedClient&) = delete;
++      SimulatedClient(SimulatedClient&&) = delete;
++      SimulatedClient& operator=(const SimulatedClient&) = delete;
++      SimulatedClient& operator=(SimulatedClient&&) = delete;
++
++      virtual ~SimulatedClient() {
++	wait_until_done();
++      }
++
++      void receive_response(const TestResponse& resp,
++			    const ServerId& server_id,
++			    const RespPm& resp_params) {
++	RespGuard g(mtx_resp);
++	resp_queue.push_back(RespQueueItem{resp, server_id, resp_params});
++	cv_resp.notify_one();
++      }
++
++      const std::vector<TimePoint>& get_op_times() const { return op_times; }
++
++      void wait_until_done() {
++	if (thd_req.joinable()) thd_req.join();
++	if (thd_resp.joinable()) thd_resp.join();
++      }
++
++      const Accum& get_accumulator() const { return accumulator; }
++
++      const InternalStats& get_internal_stats() const { return internal_stats; }
++
++    protected:
++
++      void run_req() {
++	size_t ops_count = 0;
++	for (auto i : instructions) {
++	  if (CliOp::wait == i.op) {
++	    std::this_thread::sleep_for(i.args.wait_time);
++	  } else if (CliOp::req == i.op) {
++	    Lock l(mtx_req);
++	    for (uint64_t o = 0; o < i.args.req_params.count; ++o) {
++	      while (outstanding_ops >= i.args.req_params.max_outstanding) {
++		cv_req.wait(l);
++	      }
++
++	      l.unlock();
++	      auto now = std::chrono::steady_clock::now();
++	      const ServerId& server = server_select_f(o);
++
++	      ReqPm rp =
++		time_stats_w_return<decltype(internal_stats.get_req_params_time),
++				    ReqPm>(internal_stats.mtx,
++					   internal_stats.get_req_params_time,
++					   [&]() -> ReqPm {
++					     return service_tracker.get_req_params(server);
++					   });
++	      count_stats(internal_stats.mtx,
++			  internal_stats.get_req_params_count);
++
++	      TestRequest req(server, o, 12);
++	      submit_f(server, req, id, rp);
++	      ++outstanding_ops;
++	      l.lock(); // lock for return to top of loop
++
++	      auto delay_time = now + i.args.req_params.time_bw_reqs;
++	      while (std::chrono::steady_clock::now() < delay_time) {
++		cv_req.wait_until(l, delay_time);
++	      } // while
++	    } // for
++	    ops_count += i.args.req_params.count;
++	  } else {
++	    assert(false);
++	  }
++	} // for loop
++
++	requests_complete = true;
++
++	// all requests made, thread ends
++      }
++
++
++      void run_resp() {
++	std::chrono::milliseconds delay(1000);
++	int op = 0;
++
++	Lock l(mtx_resp);
++
++	// since the following code would otherwise be repeated (except for
++	// the call to notify_one) in the two loops below; let's avoid
++	// repetition and define it once.
++	const auto proc_resp = [this, &op, &l](const bool notify_req_cv) {
++	  if (!resp_queue.empty()) {
++	    RespQueueItem item = resp_queue.front();
++	    resp_queue.pop_front();
++
++	    l.unlock();
++
++	    // data collection
++
++	    op_times.push_back(now());
++	    accum_f(accumulator, item.resp_params);
++
++	    // processing
++
++#if 0 // not needed
++	    TestResponse& resp = item.response;
++#endif
++
++	    time_stats(internal_stats.mtx,
++		       internal_stats.track_resp_time,
++		       [&](){
++			 service_tracker.track_resp(item.server_id, item.resp_params);
++		       });
++	    count_stats(internal_stats.mtx,
++			internal_stats.track_resp_count);
++
++	    --outstanding_ops;
++	    if (notify_req_cv) {
++	      cv_req.notify_one();
++	    }
++
++	    l.lock();
++	  }
++	};
++
++	while(!requests_complete.load()) {
++	  while(resp_queue.empty() && !requests_complete.load()) {
++	    cv_resp.wait_for(l, delay);
++	  }
++	  proc_resp(true);
++	}
++
++	while(outstanding_ops.load() > 0) {
++	  while(resp_queue.empty() && outstanding_ops.load() > 0) {
++	    cv_resp.wait_for(l, delay);
++	  }
++	  proc_resp(false); // don't call notify_one as all requests are complete
++	}
++
++	// all responses received, thread ends
++      }
++    }; // class SimulatedClient
++
++
++  }; // namespace qos_simulation
++}; // namespace crimson
diff --cc src/dmclock/sim/src/sim_recs.h
index 00000000000,00000000000..b64750db4af
new file mode 100644
--- /dev/null
+++ b/src/dmclock/sim/src/sim_recs.h
@@@ -1,0 -1,0 +1,121 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++
++/*
++ * Copyright (C) 2016 Red Hat Inc.
++ */
++
++
++#pragma once
++
++
++#include <stdint.h>
++#include <stdlib.h>
++#include <assert.h>
++#include <signal.h>
++
++#include <sys/time.h>
++
++#include <cmath>
++#include <limits>
++#include <string>
++#include <mutex>
++#include <iostream>
++
++
++using ClientId = uint;
++using ServerId = uint;
++
++
++namespace crimson {
++  namespace qos_simulation {
++
++    inline void debugger() {
++      raise(SIGCONT);
++    }
++
++    template<typename T>
++    void time_stats(std::mutex& mtx,
++		    T& time_accumulate,
++		    std::function<void()> code) {
++      auto t1 = std::chrono::steady_clock::now();
++      code();
++      auto t2 = std::chrono::steady_clock::now();
++      auto duration = t2 - t1;
++      auto cast_duration = std::chrono::duration_cast<T>(duration);
++      std::lock_guard<std::mutex> lock(mtx);
++      time_accumulate += cast_duration;
++    }
++
++    // unfortunately it's hard for the compiler to infer the types,
++    // and therefore when called the template params might have to be
++    // explicit
++    template<typename T, typename R>
++    R time_stats_w_return(std::mutex& mtx,
++			  T& time_accumulate,
++			  std::function<R()> code) {
++      auto t1 = std::chrono::steady_clock::now();
++      R result = code();
++      auto t2 = std::chrono::steady_clock::now();
++      auto duration = t2 - t1;
++      auto cast_duration = std::chrono::duration_cast<T>(duration);
++      std::lock_guard<std::mutex> lock(mtx);
++      time_accumulate += cast_duration;
++      return result;
++    }
++
++    template<typename T>
++    void count_stats(std::mutex& mtx,
++		     T& counter) {
++      std::lock_guard<std::mutex> lock(mtx);
++      ++counter;
++    }
++
++    struct TestRequest {
++      ServerId server; // allows debugging
++      uint32_t epoch;
++      uint32_t op;
++
++      TestRequest(ServerId _server,
++		  uint32_t _epoch,
++		  uint32_t _op) :
++	server(_server),
++	epoch(_epoch),
++	op(_op)
++      {
++	// empty
++      }
++
++      TestRequest(const TestRequest& r) :
++	TestRequest(r.server, r.epoch, r.op)
++      {
++	// empty
++      }
++    }; // struct TestRequest
++
++
++    struct TestResponse {
++      uint32_t epoch;
++
++      TestResponse(uint32_t _epoch) :
++	epoch(_epoch)
++      {
++	// empty
++      }
++
++      TestResponse(const TestResponse& r) :
++	epoch(r.epoch)
++      {
++	// empty
++      }
++
++      friend std::ostream& operator<<(std::ostream& out, const TestResponse& resp) {
++	out << "{ ";
++	out << "epoch:" << resp.epoch;
++	out << " }";
++	return out;
++      }
++    }; // class TestResponse
++
++  }; // namespace qos_simulation
++}; // namespace crimson
diff --cc src/dmclock/sim/src/sim_server.h
index 00000000000,00000000000..a61cc3204e4
new file mode 100644
--- /dev/null
+++ b/src/dmclock/sim/src/sim_server.h
@@@ -1,0 -1,0 +1,225 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++
++/*
++ * Copyright (C) 2016 Red Hat Inc.
++ */
++
++
++#pragma once
++
++
++#include <thread>
++#include <mutex>
++#include <condition_variable>
++#include <chrono>
++#include <deque>
++
++#include "sim_recs.h"
++
++
++namespace crimson {
++  namespace qos_simulation {
++
++    template<typename Q, typename ReqPm, typename RespPm, typename Accum>
++    class SimulatedServer {
++
++      struct QueueItem {
++	ClientId                     client;
++	std::unique_ptr<TestRequest> request;
++	RespPm                       additional;
++
++	QueueItem(const ClientId&                _client,
++		  std::unique_ptr<TestRequest>&& _request,
++		  const RespPm&                  _additional) :
++	  client(_client),
++	  request(std::move(_request)),
++	  additional(_additional)
++	{
++	  // empty
++	}
++      }; // QueueItem
++
++    public:
++
++      struct InternalStats {
++	std::mutex mtx;
++	std::chrono::nanoseconds add_request_time;
++	std::chrono::nanoseconds request_complete_time;
++	uint32_t add_request_count;
++	uint32_t request_complete_count;
++
++	InternalStats() :
++	  add_request_time(0),
++	  request_complete_time(0),
++	  add_request_count(0),
++	  request_complete_count(0)
++	{
++	  // empty
++	}
++      };
++
++      using ClientRespFunc = std::function<void(ClientId,
++						const TestResponse&,
++						const ServerId&,
++						const RespPm&)>;
++
++      using ServerAccumFunc = std::function<void(Accum& accumulator,
++						 const RespPm& additional)>;
++
++    protected:
++
++      const ServerId                 id;
++      Q*                             priority_queue;
++      ClientRespFunc                 client_resp_f;
++      int                            iops;
++      size_t                         thread_pool_size;
++
++      bool                           finishing;
++      std::chrono::microseconds      op_time;
++
++      std::mutex                     inner_queue_mtx;
++      std::condition_variable        inner_queue_cv;
++      std::deque<QueueItem>          inner_queue;
++
++      std::thread*                   threads;
++
++      using InnerQGuard = std::lock_guard<decltype(inner_queue_mtx)>;
++      using Lock = std::unique_lock<std::mutex>;
++
++      // data collection
++
++      ServerAccumFunc accum_f;
++      Accum accumulator;
++
++      InternalStats internal_stats;
++
++    public:
++
++      using CanHandleRequestFunc = std::function<bool(void)>;
++      using HandleRequestFunc =
++	std::function<void(const ClientId&,std::unique_ptr<TestRequest>,const RespPm&)>;
++      using CreateQueueF = std::function<Q*(CanHandleRequestFunc,HandleRequestFunc)>;
++					
++
++      SimulatedServer(ServerId _id,
++		      int _iops,
++		      size_t _thread_pool_size,
++		      const ClientRespFunc& _client_resp_f,
++		      const ServerAccumFunc& _accum_f,
++		      CreateQueueF _create_queue_f) :
++	id(_id),
++	priority_queue(_create_queue_f(std::bind(&SimulatedServer::has_avail_thread,
++						 this),
++				       std::bind(&SimulatedServer::inner_post,
++						 this,
++						 std::placeholders::_1,
++						 std::placeholders::_2,
++						 std::placeholders::_3))),
++	client_resp_f(_client_resp_f),
++	iops(_iops),
++	thread_pool_size(_thread_pool_size),
++	finishing(false),
++	accum_f(_accum_f)
++      {
++	op_time =
++	  std::chrono::microseconds((int) (0.5 +
++					   thread_pool_size * 1000000.0 / iops));
++	std::chrono::milliseconds delay(1000);
++	threads = new std::thread[thread_pool_size];
++	for (size_t i = 0; i < thread_pool_size; ++i) {
++	  threads[i] = std::thread(&SimulatedServer::run, this, delay);
++	}
++      }
++
++      virtual ~SimulatedServer() {
++	Lock l(inner_queue_mtx);
++	finishing = true;
++	inner_queue_cv.notify_all();
++	l.unlock();
++
++	for (size_t i = 0; i < thread_pool_size; ++i) {
++	  threads[i].join();
++	}
++
++	delete[] threads;
++      }
++
++      void post(const TestRequest& request,
++		const ClientId& client_id,
++		const ReqPm& req_params)
++      {
++	time_stats(internal_stats.mtx,
++		   internal_stats.add_request_time,
++		   [&](){
++		     priority_queue->add_request(request, client_id, req_params);
++		   });
++	count_stats(internal_stats.mtx,
++		    internal_stats.add_request_count);
++      }
++
++      bool has_avail_thread() {
++	InnerQGuard g(inner_queue_mtx);
++	return inner_queue.size() <= thread_pool_size;
++      }
++
++      const Accum& get_accumulator() const { return accumulator; }
++      const Q& get_priority_queue() const { return *priority_queue; }
++      const InternalStats& get_internal_stats() const { return internal_stats; }
++
++    protected:
++
++      void inner_post(const ClientId& client,
++		      std::unique_ptr<TestRequest> request,
++		      const RespPm& additional) {
++	Lock l(inner_queue_mtx);
++	assert(!finishing);
++	accum_f(accumulator, additional);
++	inner_queue.emplace_back(QueueItem(client,
++					   std::move(request),
++					   additional));
++	inner_queue_cv.notify_one();
++      }
++
++      void run(std::chrono::milliseconds check_period) {
++	Lock l(inner_queue_mtx);
++	while(true) {
++	  while(inner_queue.empty() && !finishing) {
++	    inner_queue_cv.wait_for(l, check_period);
++	  }
++	  if (!inner_queue.empty()) {
++	    auto& front = inner_queue.front();
++	    auto client = front.client;
++	    auto req = std::move(front.request);
++	    auto additional = front.additional;
++	    inner_queue.pop_front();
++
++	    l.unlock();
++
++	    // simulation operation by sleeping; then call function to
++	    // notify server of completion
++	    std::this_thread::sleep_for(op_time);
++
++	    TestResponse resp(req->epoch);
++	    // TODO: rather than assuming this constructor exists, perhaps
++	    // pass in a function that does this mapping?
++	    client_resp_f(client, resp, id, additional);
++
++	    time_stats(internal_stats.mtx,
++		       internal_stats.request_complete_time,
++		       [&](){
++			 priority_queue->request_completed();
++		       });
++	    count_stats(internal_stats.mtx,
++			internal_stats.request_complete_count);
++
++	    l.lock(); // in prep for next iteration of loop
++	  } else {
++	    break;
++	  }
++	}
++      }
++    }; // class SimulatedServer
++
++  }; // namespace qos_simulation
++}; // namespace crimson
diff --cc src/dmclock/sim/src/simulate.h
index 00000000000,00000000000..18e752d8a35
new file mode 100644
--- /dev/null
+++ b/src/dmclock/sim/src/simulate.h
@@@ -1,0 -1,0 +1,430 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++
++/*
++ * Copyright (C) 2016 Red Hat Inc.
++ */
++
++
++#pragma once
++
++
++#include <assert.h>
++
++#include <memory>
++#include <chrono>
++#include <map>
++#include <random>
++#include <iostream>
++#include <iomanip>
++#include <string>
++
++
++namespace crimson {
++  namespace qos_simulation {
++
++    template<typename ServerId, typename ClientId, typename TS, typename TC>
++    class Simulation {
++  
++    public:
++
++      using TimePoint = std::chrono::time_point<std::chrono::steady_clock>;
++
++    protected:
++
++      using ClientMap = std::map<ClientId,TC*>;
++      using ServerMap = std::map<ServerId,TS*>;
++
++      uint server_count = 0;
++      uint client_count = 0;
++
++      ServerMap servers;
++      ClientMap clients;
++      std::vector<ServerId> server_ids;
++
++      TimePoint early_time;
++      TimePoint servers_created_time;
++      TimePoint clients_created_time;
++      TimePoint clients_finished_time;
++      TimePoint late_time;
++
++      std::default_random_engine prng;
++
++      bool has_run = false;
++
++
++    public:
++
++      double fmt_tp(const TimePoint& t) {
++	auto c = t.time_since_epoch().count();
++	return uint64_t(c / 1000000.0 + 0.5) % 100000 / 1000.0;
++      }
++
++      TimePoint now() {
++	return std::chrono::steady_clock::now();
++      }
++
++      using ClientBasedServerSelectFunc =
++	std::function<const ServerId&(uint64_t, uint16_t)>;
++
++      using ClientFilter = std::function<bool(const ClientId&)>;
++
++      using ServerFilter = std::function<bool(const ServerId&)>;
++
++      using ServerDataOutF =
++	std::function<void(std::ostream& out,
++			   Simulation* sim, ServerFilter,
++			   int header_w, int data_w, int data_prec)>;
++
++      using ClientDataOutF =
++	std::function<void(std::ostream& out,
++			   Simulation* sim, ClientFilter,
++			   int header_w, int data_w, int data_prec)>;
++
++      Simulation() :
++	early_time(now()),
++	prng(std::chrono::system_clock::now().time_since_epoch().count())
++      {
++	// empty
++      }
++
++      uint get_client_count() const { return client_count; }
++      uint get_server_count() const { return server_count; }
++      TC& get_client(ClientId id) { return *clients[id]; }
++      TS& get_server(ServerId id) { return *servers[id]; }
++      const ServerId& get_server_id(uint index) const {
++	return server_ids[index];
++      }
++
++
++      void add_servers(uint count,
++		       std::function<TS*(ServerId)> create_server_f) {
++	uint i = server_count;
++
++	// increment server_count before creating servers since they
++	// will start running immediately and may use the server_count
++	// value; NB: this could still be an issue if servers are
++	// added with multiple add_servers calls; consider using a
++	// separate start function after all servers (and clients?)
++	// have been added
++	server_count += count;
++
++	for (; i < server_count; ++i) {
++	  server_ids.push_back(i);
++	  servers[i] = create_server_f(i);
++	}
++
++	servers_created_time = now();
++      }
++
++
++      void add_clients(uint count,
++		       std::function<TC*(ClientId)> create_client_f) {
++	uint i = client_count;
++
++	// increment client_count before creating clients since they
++	// will start running immediately and may use the client_count
++	// value (e.g., in the server selection function); NB: this could
++	// still be an issue if clients are added with multiple
++	// add_clients calls; consider using a separate start function
++	// after all clients have been added
++	client_count += count;
++
++	for (; i < client_count; ++i) {
++	  clients[i] = create_client_f(i);
++	}
++
++	clients_created_time = now();
++      }
++
++
++      void run() {
++	assert(server_count > 0);
++	assert(client_count > 0);
++
++	std::cout << "simulation started" << std::endl;
++
++	// clients are now running; wait for all to finish
++
++	for (auto const &i : clients) {
++	  i.second->wait_until_done();
++	}
++
++	late_time = clients_finished_time = now();
++
++	std::cout << "simulation completed in " <<
++	  std::chrono::duration_cast<std::chrono::milliseconds>(clients_finished_time - servers_created_time).count() <<
++	  " millisecs" << std::endl;
++
++	has_run = true;
++      } // run
++
++
++      void display_stats(std::ostream& out,
++			 ServerDataOutF server_out_f, ClientDataOutF client_out_f,
++			 ServerFilter server_filter =
++			 [] (const ServerId&) { return true; },
++			 ClientFilter client_filter =
++			 [] (const ClientId&) { return true; },
++			 int head_w = 12, int data_w = 7, int data_prec = 2) {
++	assert(has_run);
++
++	// skip first 2 secondsd of data
++	const std::chrono::seconds skip_amount(0);
++	// calculate in groups of 5 seconds
++	const std::chrono::seconds measure_unit(2);
++	// unit to output reports in
++	const std::chrono::seconds report_unit(1);
++
++	// compute and display stats
++
++	TimePoint earliest_start = late_time;
++	TimePoint latest_start = early_time;
++	TimePoint earliest_finish = late_time;
++	TimePoint latest_finish = early_time;
++
++	for (auto const &c : clients) {
++	  auto start = c.second->get_op_times().front();
++	  auto end = c.second->get_op_times().back();
++
++	  if (start < earliest_start) { earliest_start = start; }
++	  if (start > latest_start) { latest_start = start; }
++	  if (end < earliest_finish) { earliest_finish = end; }
++	  if (end > latest_finish) { latest_finish = end; }
++	}
++
++	double ops_factor =
++	  std::chrono::duration_cast<std::chrono::duration<double>>(measure_unit) /
++	  std::chrono::duration_cast<std::chrono::duration<double>>(report_unit);
++
++	const auto start_edge = clients_created_time + skip_amount;
++
++	std::map<ClientId,std::vector<double>> ops_data;
++
++	for (auto const &c : clients) {
++	  auto it = c.second->get_op_times().begin();
++	  const auto end = c.second->get_op_times().end();
++	  while (it != end && *it < start_edge) { ++it; }
++
++	  for (auto time_edge = start_edge + measure_unit;
++	       time_edge <= latest_finish + measure_unit;
++	       time_edge += measure_unit) {
++	    int count = 0;
++	    for (; it != end && *it < time_edge; ++count, ++it) { /* empty */ }
++	    double ops_per_second = double(count) / ops_factor;
++	    ops_data[c.first].push_back(ops_per_second);
++	  }
++	}
++
++	out << "==== Client Data ====" << std::endl;
++
++	out << std::setw(head_w) << "client:";
++	for (auto const &c : clients) {
++	  if (!client_filter(c.first)) continue;
++	  out << " " << std::setw(data_w) << c.first;
++	}
++	out << std::setw(data_w) << "total" << std::endl;
++
++	{
++	  bool has_data;
++	  size_t i = 0;
++	  do {
++	    std::string line_header = "t_" + std::to_string(i) + ":";
++	    out << std::setw(head_w) << line_header;
++	    has_data = false;
++	    double total = 0.0;
++	    for (auto const &c : clients) {
++	      double data = 0.0;
++	      if (i < ops_data[c.first].size()) {
++		data = ops_data[c.first][i];
++		has_data = true;
++	      }
++	      total += data;
++
++	      if (!client_filter(c.first)) continue;
++
++	      out << " " << std::setw(data_w) << std::setprecision(data_prec) <<
++		std::fixed << data;
++	    }
++	    out << " " << std::setw(data_w) << std::setprecision(data_prec) <<
++	      std::fixed << total << std::endl;
++	    ++i;
++	  } while(has_data);
++	}
++
++	client_out_f(out, this, client_filter, head_w, data_w, data_prec);
++
++	display_client_internal_stats<std::chrono::nanoseconds>(out,
++								"nanoseconds");
++
++	out << std::endl << "==== Server Data ====" << std::endl;
++
++	out << std::setw(head_w) << "server:";
++	for (auto const &s : servers) {
++	  if (!server_filter(s.first)) continue;
++	  out << " " << std::setw(data_w) << s.first;
++	}
++	out << " " << std::setw(data_w) << "total" << std::endl;
++
++	server_out_f(out, this, server_filter, head_w, data_w, data_prec);
++
++	display_server_internal_stats<std::chrono::nanoseconds>(out,
++								"nanoseconds");
++
++	// clean up clients then servers
++
++	for (auto i = clients.begin(); i != clients.end(); ++i) {
++	  delete i->second;
++	  i->second = nullptr;
++	}
++
++	for (auto i = servers.begin(); i != servers.end(); ++i) {
++	  delete i->second;
++	  i->second = nullptr;
++	}
++      } // display_stats
++
++
++      template<typename T>
++      void display_server_internal_stats(std::ostream& out,
++					 std::string time_unit) {
++	T add_request_time(0);
++	T request_complete_time(0);
++	uint32_t add_request_count = 0;
++	uint32_t request_complete_count = 0;
++
++	for (uint i = 0; i < get_server_count(); ++i) {
++	  const auto& server = get_server(i);
++	  const auto& is = server.get_internal_stats();
++	  add_request_time +=
++	    std::chrono::duration_cast<T>(is.add_request_time);
++	  request_complete_time +=
++	    std::chrono::duration_cast<T>(is.request_complete_time);
++	  add_request_count += is.add_request_count;
++	  request_complete_count += is.request_complete_count;
++	}
++
++	double add_request_time_per_unit =
++	  double(add_request_time.count()) / add_request_count ;
++	out << "total time to add requests: " <<
++	  std::fixed << add_request_time.count() << " " << time_unit <<
++	  ";" << std::endl <<
++	  "    count: " << add_request_count << ";" << std::endl <<
++	  "    average: " << add_request_time_per_unit <<
++	  " " << time_unit << " per request/response" << std::endl;
++
++	double request_complete_time_unit =
++	  double(request_complete_time.count()) / request_complete_count ;
++	out << "total time to note requests complete: " << std::fixed <<
++	  request_complete_time.count() << " " << time_unit << ";" <<
++	  std::endl << 
++	  "    count: " << request_complete_count << ";" << std::endl <<
++	  "    average: " << request_complete_time_unit <<
++	  " " << time_unit << " per request/response" << std::endl;
++
++	out << std::endl;
++
++	assert(add_request_count == request_complete_count);
++	out << "server timing for QOS algorithm: " <<
++	  add_request_time_per_unit + request_complete_time_unit <<
++	  " " << time_unit << " per request/response" << std::endl;
++      }
++
++
++      template<typename T>
++      void display_client_internal_stats(std::ostream& out,
++					 std::string time_unit) {
++	T track_resp_time(0);
++	T get_req_params_time(0);
++	uint32_t track_resp_count = 0;
++	uint32_t get_req_params_count = 0;
++
++	for (uint i = 0; i < get_client_count(); ++i) {
++	  const auto& client = get_client(i);
++	  const auto& is = client.get_internal_stats();
++	  track_resp_time +=
++	    std::chrono::duration_cast<T>(is.track_resp_time);
++	  get_req_params_time +=
++	    std::chrono::duration_cast<T>(is.get_req_params_time);
++	  track_resp_count += is.track_resp_count;
++	  get_req_params_count += is.get_req_params_count;
++	}
++
++	double track_resp_time_unit =
++	  double(track_resp_time.count()) / track_resp_count;
++	out << "total time to track responses: " <<
++	  std::fixed << track_resp_time.count() << " " << time_unit << ";" <<
++	  std::endl <<
++	  "    count: " << track_resp_count << ";" << std::endl <<
++	  "    average: " << track_resp_time_unit << " " << time_unit <<
++	  " per request/response" << std::endl;
++
++	double get_req_params_time_unit =
++	  double(get_req_params_time.count()) / get_req_params_count;
++	out << "total time to get request parameters: " <<
++	  std::fixed << get_req_params_time.count() << " " << time_unit <<
++	  ";" << std::endl <<
++	  "    count: " << get_req_params_count << ";" << std::endl <<
++	  "    average: " << get_req_params_time_unit << " " << time_unit <<
++	  " per request/response" << std::endl;
++
++	out << std::endl;
++
++	assert(track_resp_count == get_req_params_count);
++	out << "client timing for QOS algorithm: " <<
++	  track_resp_time_unit + get_req_params_time_unit << " " <<
++	  time_unit << " per request/response" << std::endl;
++      }
++
++
++      // **** server selection functions ****
++
++
++      const ServerId& server_select_alternate(uint64_t seed,
++					      uint16_t client_idx) {
++	uint index = (client_idx + seed) % server_count;
++	return server_ids[index];
++      }
++
++
++      // returns a lambda using the range specified as servers_per (client)
++      ClientBasedServerSelectFunc
++      make_server_select_alt_range(uint16_t servers_per) {
++	return [servers_per,this](uint64_t seed, uint16_t client_idx)
++	  -> const ServerId& {
++	  double factor = double(server_count) / client_count;
++	  uint offset = seed % servers_per;
++	  uint index = (uint(0.5 + client_idx * factor) + offset) % server_count;
++	  return server_ids[index];
++	};
++      }
++
++
++      // function to choose a server randomly
++      const ServerId& server_select_random(uint64_t seed, uint16_t client_idx) {
++	uint index = prng() % server_count;
++	return server_ids[index];
++      }
++
++  
++      // function to choose a server randomly
++      ClientBasedServerSelectFunc
++      make_server_select_ran_range(uint16_t servers_per) {
++	return [servers_per,this](uint64_t seed, uint16_t client_idx)
++	  -> const ServerId& {
++	  double factor = double(server_count) / client_count;
++	  uint offset = prng() % servers_per;
++	  uint index = (uint(0.5 + client_idx * factor) + offset) % server_count;
++	  return server_ids[index];
++	};
++      }
++
++
++      // function to always choose the first server
++      const ServerId& server_select_0(uint64_t seed, uint16_t client_idx) {
++	return server_ids[0];
++      }
++    }; // class Simulation
++
++  }; // namespace qos_simulation
++}; // namespace crimson
diff --cc src/dmclock/sim/src/ssched/ssched_client.h
index 00000000000,00000000000..dcbe0771de5
new file mode 100644
--- /dev/null
+++ b/src/dmclock/sim/src/ssched/ssched_client.h
@@@ -1,0 -1,0 +1,44 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++
++/*
++ * Copyright (C) 2016 Red Hat Inc.
++ */
++
++
++#pragma once
++
++#include "ssched_recs.h"
++
++
++namespace crimson {
++  namespace simple_scheduler {
++
++    // S is server identifier type
++    template<typename S>
++    class ServiceTracker {
++
++    public:
++
++      // we have to start the counters at 1, as 0 is used in the
++      // cleaning process
++      ServiceTracker()
++      {
++	// emptry
++      }
++
++
++      void track_resp(const S& server_id, const NullData& ignore) {
++	// empty
++      }
++
++
++      /*
++       * Returns the ReqParams for the given server.
++       */
++      ReqParams get_req_params(const S& server) {
++	return ReqParams();
++      } // get_req_params
++    }; // class ServiceTracker
++  } // namespace simple_scheduler
++} // namespace crimson
diff --cc src/dmclock/sim/src/ssched/ssched_recs.h
index 00000000000,00000000000..3332d5a4933
new file mode 100644
--- /dev/null
+++ b/src/dmclock/sim/src/ssched/ssched_recs.h
@@@ -1,0 -1,0 +1,37 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++
++/*
++ * Copyright (C) 2016 Red Hat Inc.
++ */
++
++
++#pragma once
++
++
++#include <ostream>
++#include <assert.h>
++
++
++namespace crimson {
++  namespace simple_scheduler {
++
++    // since we send no additional data out
++    // NOTE: Change name to RespParams? Is it used elsewhere?
++    struct NullData {
++      friend std::ostream& operator<<(std::ostream& out, const NullData& n) {
++	out << "NullData{ EMPTY }";
++	return out;
++      }
++    }; // struct NullData
++
++
++    struct ReqParams {
++      friend std::ostream& operator<<(std::ostream& out, const ReqParams& rp) {
++	out << "ReqParams{ EMPTY }";
++	return out;
++      }
++    };
++
++  }
++}
diff --cc src/dmclock/sim/src/ssched/ssched_server.h
index 00000000000,00000000000..ee4c1e6e3ef
new file mode 100644
--- /dev/null
+++ b/src/dmclock/sim/src/ssched/ssched_server.h
@@@ -1,0 -1,0 +1,182 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++
++/*
++ * Copyright (C) 2016 Red Hat Inc.
++ */
++
++#pragma once
++
++#include <memory>
++#include <mutex>
++#include <deque>
++
++#include "boost/variant.hpp"
++
++#include "ssched_recs.h"
++
++#ifdef PROFILE
++#include "profile.h"
++#endif
++
++namespace crimson {
++
++  namespace simple_scheduler {
++
++    template<typename C, typename R, typename Time>
++    class SimpleQueue {
++
++    public:
++
++      using RequestRef = std::unique_ptr<R>;
++
++      // a function to see whether the server can handle another request
++      using CanHandleRequestFunc = std::function<bool(void)>;
++
++      // a function to submit a request to the server; the second
++      // parameter is a callback when it's completed
++      using HandleRequestFunc =
++	std::function<void(const C&,RequestRef,NullData)>;
++
++      struct PullReq {
++	enum class Type { returning, none };
++
++	struct Retn {
++	  C           client;
++	  RequestRef  request;
++	};
++
++	Type                 type;
++	boost::variant<Retn> data;
++      };
++
++    protected:
++
++      enum class Mechanism { push, pull };
++
++      struct QRequest {
++	C          client;
++	RequestRef request;
++      };
++
++      bool finishing = false;
++      Mechanism mechanism;
++
++      CanHandleRequestFunc can_handle_f;
++      HandleRequestFunc handle_f;
++
++      mutable std::mutex queue_mtx;
++      using DataGuard = std::lock_guard<decltype(queue_mtx)>;
++
++      std::deque<QRequest> queue;
++
++#ifdef PROFILE
++    public:
++      ProfileTimer<std::chrono::nanoseconds> pull_request_timer;
++      ProfileTimer<std::chrono::nanoseconds> add_request_timer;
++      ProfileTimer<std::chrono::nanoseconds> request_complete_timer;
++    protected:
++#endif
++
++    public:
++
++      // push full constructor
++      SimpleQueue(CanHandleRequestFunc _can_handle_f,
++		  HandleRequestFunc _handle_f) :
++	mechanism(Mechanism::push),
++	can_handle_f(_can_handle_f),
++	handle_f(_handle_f)
++      {
++	// empty
++      }
++
++      SimpleQueue() :
++	mechanism(Mechanism::pull)
++      {
++	// empty
++      }
++
++      ~SimpleQueue() {
++	finishing = true;
++      }
++
++      void add_request(const R& request,
++		       const C& client_id,
++		       const ReqParams& req_params) {
++	add_request(RequestRef(new R(request)), client_id, req_params);
++      }
++
++      void add_request(RequestRef&& request,
++		       const C& client_id,
++		       const ReqParams& req_params) {
++	DataGuard g(queue_mtx);
++
++#ifdef PROFILE
++	add_request_timer.start();
++#endif
++	queue.emplace_back(QRequest{client_id, std::move(request)});
++
++	if (Mechanism::push == mechanism) {
++	  schedule_request();
++	}
++
++#ifdef PROFILE
++	add_request_timer.stop();
++#endif
++      } // add_request
++
++      void request_completed() {
++	assert(Mechanism::push == mechanism);
++	DataGuard g(queue_mtx);
++
++#ifdef PROFILE
++	request_complete_timer.start();
++#endif
++	schedule_request();
++
++#ifdef PROFILE
++	request_complete_timer.stop();
++#endif
++      } // request_completed
++
++      PullReq pull_request() {
++	assert(Mechanism::pull == mechanism);
++	PullReq result;
++	DataGuard g(queue_mtx);
++
++#ifdef PROFILE
++	pull_request_timer.start();
++#endif
++
++	if (queue.empty()) {
++	  result.type = PullReq::Type::none;
++	} else {
++	  auto front = queue.front();
++	  result.type = PullReq::Type::returning;
++	  result.data =
++	    typename PullReq::Retn{front.client, std::move(front.request)};
++	  queue.pop();
++	}
++
++#ifdef PROFILE
++	pull_request_timer.stop();
++#endif
++
++	return result;
++      }
++
++    protected:
++
++      // queue_mtx should be held when called; should only be called
++      // when mechanism is push
++      void schedule_request() {
++	if (!queue.empty() && can_handle_f()) {
++	  auto& front = queue.front();
++	  static NullData null_data;
++	  handle_f(front.client, std::move(front.request), null_data);
++	  queue.pop_front();
++	}
++      }
++    };
++  };
++};
diff --cc src/dmclock/sim/src/str_list.cc
index 00000000000,00000000000..22109e00840
new file mode 100644
--- /dev/null
+++ b/src/dmclock/sim/src/str_list.cc
@@@ -1,0 -1,0 +1,106 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++/*
++ * Ceph - scalable distributed file system
++ *
++ * Copyright (C) 2009-2010 Dreamhost
++ *
++ * This is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License version 2.1, as published by the Free Software
++ * Foundation.  See file COPYING.
++ *
++ */
++
++#include "str_list.h"
++
++using std::string;
++using std::vector;
++using std::set;
++using std::list;
++
++static bool get_next_token(const string &s, size_t& pos, const char *delims, string& token)
++{
++  int start = s.find_first_not_of(delims, pos);
++  int end;
++
++  if (start < 0){
++    pos = s.size();
++    return false;
++  }
++
++  end = s.find_first_of(delims, start);
++  if (end >= 0)
++    pos = end + 1;
++  else {
++    pos = end = s.size();
++  }
++
++  token = s.substr(start, end - start);
++  return true;
++}
++
++void get_str_list(const string& str, const char *delims, list<string>& str_list)
++{
++  size_t pos = 0;
++  string token;
++
++  str_list.clear();
++
++  while (pos < str.size()) {
++    if (get_next_token(str, pos, delims, token)) {
++      if (token.size() > 0) {
++        str_list.push_back(token);
++      }
++    }
++  }
++}
++
++void get_str_list(const string& str, list<string>& str_list)
++{
++  const char *delims = ";,= \t";
++  return get_str_list(str, delims, str_list);
++}
++
++void get_str_vec(const string& str, const char *delims, vector<string>& str_vec)
++{
++  size_t pos = 0;
++  string token;
++  str_vec.clear();
++
++  while (pos < str.size()) {
++    if (get_next_token(str, pos, delims, token)) {
++      if (token.size() > 0) {
++        str_vec.push_back(token);
++      }
++    }
++  }
++}
++
++void get_str_vec(const string& str, vector<string>& str_vec)
++{
++  const char *delims = ";,= \t";
++  return get_str_vec(str, delims, str_vec);
++}
++
++void get_str_set(const string& str, const char *delims, set<string>& str_set)
++{
++  size_t pos = 0;
++  string token;
++
++  str_set.clear();
++
++  while (pos < str.size()) {
++    if (get_next_token(str, pos, delims, token)) {
++      if (token.size() > 0) {
++        str_set.insert(token);
++      }
++    }
++  }
++}
++
++void get_str_set(const string& str, set<string>& str_set)
++{
++  const char *delims = ";,= \t";
++  return get_str_set(str, delims, str_set);
++}
diff --cc src/dmclock/sim/src/str_list.h
index 00000000000,00000000000..4ba0cadd960
new file mode 100644
--- /dev/null
+++ b/src/dmclock/sim/src/str_list.h
@@@ -1,0 -1,0 +1,94 @@@
++#ifndef CEPH_STRLIST_H
++#define CEPH_STRLIST_H
++
++#include <list>
++#include <set>
++#include <sstream>
++#include <string>
++#include <vector>
++
++/**
++ * Split **str** into a list of strings, using the ";,= \t" delimiters and output the result in **str_list**.
++ * 
++ * @param [in] str String to split and save as list
++ * @param [out] str_list List modified containing str after it has been split
++**/
++extern void get_str_list(const std::string& str,
++			 std::list<std::string>& str_list);
++
++/**
++ * Split **str** into a list of strings, using the **delims** delimiters and output the result in **str_list**.
++ * 
++ * @param [in] str String to split and save as list
++ * @param [in] delims characters used to split **str**
++ * @param [out] str_list List modified containing str after it has been split
++**/
++extern void get_str_list(const std::string& str,
++                         const char *delims,
++			 std::list<std::string>& str_list);
++
++/**
++ * Split **str** into a list of strings, using the ";,= \t" delimiters and output the result in **str_vec**.
++ * 
++ * @param [in] str String to split and save as Vector
++ * @param [out] str_vec Vector modified containing str after it has been split
++**/
++extern void get_str_vec(const std::string& str,
++			 std::vector<std::string>& str_vec);
++
++/**
++ * Split **str** into a list of strings, using the **delims** delimiters and output the result in **str_vec**.
++ * 
++ * @param [in] str String to split and save as Vector
++ * @param [in] delims characters used to split **str**
++ * @param [out] str_vec Vector modified containing str after it has been split
++**/
++extern void get_str_vec(const std::string& str,
++                         const char *delims,
++			 std::vector<std::string>& str_vec);
++
++/**
++ * Split **str** into a list of strings, using the ";,= \t" delimiters and output the result in **str_list**.
++ * 
++ * @param [in] str String to split and save as Set
++ * @param [out] str_list Set modified containing str after it has been split
++**/
++extern void get_str_set(const std::string& str,
++			std::set<std::string>& str_list);
++
++/**
++ * Split **str** into a list of strings, using the **delims** delimiters and output the result in **str_list**.
++ * 
++ * @param [in] str String to split and save as Set
++ * @param [in] delims characters used to split **str**
++ * @param [out] str_list Set modified containing str after it has been split
++**/
++extern void get_str_set(const std::string& str,
++                        const char *delims,
++			std::set<std::string>& str_list);
++
++/**
++ * Return a String containing the vector **v** joined with **sep**
++ * 
++ * If **v** is empty, the function returns an empty string
++ * For each element in **v**,
++ * it will concatenate this element and **sep** with result
++ * 
++ * @param [in] v Vector to join as a String
++ * @param [in] sep String used to join each element from **v**
++ * @return empty string if **v** is empty or concatenated string
++**/
++inline std::string str_join(const std::vector<std::string>& v, std::string sep)
++{
++  if (v.empty())
++    return std::string();
++  std::vector<std::string>::const_iterator i = v.begin();
++  std::string r = *i;
++  for (++i; i != v.end(); ++i) {
++    r += sep;
++    r += *i;
++  }
++  return r;
++}
++
++#endif
diff --cc src/dmclock/sim/src/test_dmclock.cc
index 00000000000,00000000000..8e7aa4ab219
new file mode 100644
--- /dev/null
+++ b/src/dmclock/sim/src/test_dmclock.cc
@@@ -1,0 -1,0 +1,40 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++
++/*
++ * Copyright (C) 2016 Red Hat Inc.
++ */
++
++
++#include "dmclock_recs.h"
++#include "dmclock_server.h"
++#include "dmclock_client.h"
++
++#include "sim_recs.h"
++#include "sim_server.h"
++#include "sim_client.h"
++
++#include "test_dmclock.h"
++
++
++namespace test = crimson::test_dmc;
++
++
++void test::dmc_server_accumulate_f(test::DmcAccum& a,
++				   const test::dmc::PhaseType& phase) {
++  if (test::dmc::PhaseType::reservation == phase) {
++    ++a.reservation_count;
++  } else {
++    ++a.proportion_count;
++  }
++}
++
++
++void test::dmc_client_accumulate_f(test::DmcAccum& a,
++				   const test::dmc::PhaseType& phase) {
++  if (test::dmc::PhaseType::reservation == phase) {
++    ++a.reservation_count;
++  } else {
++    ++a.proportion_count;
++  }
++}
diff --cc src/dmclock/sim/src/test_dmclock.h
index 00000000000,00000000000..7f1e55439ed
new file mode 100644
--- /dev/null
+++ b/src/dmclock/sim/src/test_dmclock.h
@@@ -1,0 -1,0 +1,56 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++
++/*
++ * Copyright (C) 2016 Red Hat Inc.
++ */
++
++
++#include "dmclock_recs.h"
++#include "dmclock_server.h"
++#include "dmclock_client.h"
++
++#include "sim_recs.h"
++#include "sim_server.h"
++#include "sim_client.h"
++
++#include "simulate.h"
++
++
++namespace crimson {
++  namespace test_dmc {
++    
++    namespace dmc = crimson::dmclock;
++    namespace sim = crimson::qos_simulation;
++
++    struct DmcAccum {
++      uint64_t reservation_count = 0;
++      uint64_t proportion_count = 0;
++    };
++
++    using DmcQueue = dmc::PushPriorityQueue<ClientId,sim::TestRequest>;
++
++    using DmcServer = sim::SimulatedServer<DmcQueue,
++					   dmc::ReqParams,
++					   dmc::PhaseType,
++					   DmcAccum>;
++
++    using DmcClient = sim::SimulatedClient<dmc::ServiceTracker<ServerId>,
++					   dmc::ReqParams,
++					   dmc::PhaseType,
++					   DmcAccum>;
++
++    using CreateQueueF = std::function<DmcQueue*(DmcQueue::CanHandleRequestFunc,
++						 DmcQueue::HandleRequestFunc)>;
++
++    using MySim = sim::Simulation<ServerId,ClientId,DmcServer,DmcClient>;
++
++    using SubmitFunc = DmcClient::SubmitFunc;
++
++    extern void dmc_server_accumulate_f(DmcAccum& a,
++					const dmc::PhaseType& phase);
++
++    extern void dmc_client_accumulate_f(DmcAccum& a,
++					const dmc::PhaseType& phase);
++  } // namespace test_dmc
++} // namespace crimson
diff --cc src/dmclock/sim/src/test_dmclock_main.cc
index 00000000000,00000000000..c3ba1e18fbd
new file mode 100644
--- /dev/null
+++ b/src/dmclock/sim/src/test_dmclock_main.cc
@@@ -1,0 -1,0 +1,322 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++
++/*
++ * Copyright (C) 2016 Red Hat Inc.
++ */
++
++
++#include "test_dmclock.h"
++#include "config.h"
++
++#ifdef PROFILE
++#include "profile.h"
++#endif
++
++
++namespace dmc = crimson::dmclock;
++namespace test = crimson::test_dmc;
++namespace sim = crimson::qos_simulation;
++
++using namespace std::placeholders;
++
++
++namespace crimson {
++    namespace test_dmc {
++        void server_data(std::ostream& out,
++                         test::MySim* sim,
++                         test::MySim::ServerFilter server_disp_filter,
++                         int head_w, int data_w, int data_prec);
++
++        void client_data(std::ostream& out,
++                         test::MySim* sim,
++                         test::MySim::ClientFilter client_disp_filter,
++                         int head_w, int data_w, int data_prec);
++    }
++}
++
++
++int main(int argc, char* argv[]) {
++    std::vector<const char*> args;
++    for (int i = 1; i < argc; ++i) {
++      args.push_back(argv[i]);
++    }
++
++    std::string conf_file_list;
++    sim::ceph_argparse_early_args(args, &conf_file_list);
++
++    sim::sim_config_t g_conf;
++    std::vector<sim::cli_group_t> &cli_group = g_conf.cli_group;
++    std::vector<sim::srv_group_t> &srv_group = g_conf.srv_group;
++
++    if (!conf_file_list.empty()) {
++      int ret;
++      ret = sim::parse_config_file(conf_file_list, g_conf);
++      if (ret) {
++	// error
++	_exit(1);
++      }
++    } else {
++      // default simulation parameter
++      g_conf.client_groups = 2;
++
++      sim::srv_group_t st;
++      srv_group.push_back(st);
++
++      sim::cli_group_t ct1(99, 0);
++      cli_group.push_back(ct1);
++
++      sim::cli_group_t ct2(1, 10);
++      cli_group.push_back(ct2);
++    }
++
++    const uint server_groups = g_conf.server_groups;
++    const uint client_groups = g_conf.client_groups;
++    const bool server_random_selection = g_conf.server_random_selection;
++    const bool server_soft_limit = g_conf.server_soft_limit;
++    uint server_total_count = 0;
++    uint client_total_count = 0;
++
++    for (uint i = 0; i < client_groups; ++i) {
++      client_total_count += cli_group[i].client_count;
++    }
++
++    for (uint i = 0; i < server_groups; ++i) {
++      server_total_count += srv_group[i].server_count;
++    }
++
++    std::vector<test::dmc::ClientInfo> client_info;
++    for (uint i = 0; i < client_groups; ++i) {
++      client_info.push_back(test::dmc::ClientInfo 
++			  { cli_group[i].client_reservation,
++			    cli_group[i].client_weight,
++			    cli_group[i].client_limit } );
++    }
++
++    auto ret_client_group_f = [&](const ClientId& c) -> uint {
++      uint group_max = 0;
++      uint i = 0;
++      for (; i < client_groups; ++i) {
++	group_max += cli_group[i].client_count;
++	if (c < group_max) {
++	  break;
++	}
++      }
++      return i;
++    };
++
++    auto ret_server_group_f = [&](const ServerId& s) -> uint {
++      uint group_max = 0;
++      uint i = 0;
++      for (; i < server_groups; ++i) {
++	group_max += srv_group[i].server_count;
++	if (s < group_max) {
++	  break;
++	}
++      }
++      return i;
++    };
++
++    auto client_info_f = [=](const ClientId& c) -> test::dmc::ClientInfo {
++      return client_info[ret_client_group_f(c)];
++    };
++
++    auto client_disp_filter = [=] (const ClientId& i) -> bool {
++        return i < 3 || i >= (client_total_count - 3);
++    };
++
++    auto server_disp_filter = [=] (const ServerId& i) -> bool {
++        return i < 3 || i >= (server_total_count - 3);
++    };
++
++
++    test::MySim *simulation;
++  
++
++    // lambda to post a request to the identified server; called by client
++    test::SubmitFunc server_post_f =
++        [&simulation](const ServerId& server,
++                      const sim::TestRequest& request,
++                      const ClientId& client_id,
++                      const test::dmc::ReqParams& req_params) {
++        test::DmcServer& s = simulation->get_server(server);
++        s.post(request, client_id, req_params);
++    };
++
++    std::vector<std::vector<sim::CliInst>> cli_inst;
++    for (uint i = 0; i < client_groups; ++i) {
++      if (cli_group[i].client_wait == std::chrono::seconds(0)) {
++	cli_inst.push_back(
++	    { { sim::req_op, 
++	        (uint32_t)cli_group[i].client_total_ops,
++	        (double)cli_group[i].client_iops_goal, 
++	        (uint16_t)cli_group[i].client_outstanding_ops } } );
++      } else {
++	cli_inst.push_back(
++	    { { sim::wait_op, cli_group[i].client_wait },
++	      { sim::req_op, 
++	        (uint32_t)cli_group[i].client_total_ops,
++		(double)cli_group[i].client_iops_goal, 
++		(uint16_t)cli_group[i].client_outstanding_ops } } );
++      }
++    }
++
++    simulation = new test::MySim();
++
++    test::DmcServer::ClientRespFunc client_response_f =
++        [&simulation](ClientId client_id,
++                      const sim::TestResponse& resp,
++                      const ServerId& server_id,
++                      const dmc::PhaseType& phase) {
++        simulation->get_client(client_id).receive_response(resp,
++                                                           server_id,
++                                                           phase);
++    };
++
++    test::CreateQueueF create_queue_f =
++        [&](test::DmcQueue::CanHandleRequestFunc can_f,
++            test::DmcQueue::HandleRequestFunc handle_f) -> test::DmcQueue* {
++        return new test::DmcQueue(client_info_f, can_f, handle_f, server_soft_limit);
++    };
++
++ 
++    auto create_server_f = [&](ServerId id) -> test::DmcServer* {
++      uint i = ret_server_group_f(id);
++      return new test::DmcServer(id,
++                                 srv_group[i].server_iops,
++				 srv_group[i].server_threads,
++				 client_response_f,
++				 test::dmc_server_accumulate_f,
++				 create_queue_f);
++    };
++
++    auto create_client_f = [&](ClientId id) -> test::DmcClient* {
++      uint i = ret_client_group_f(id);
++      test::MySim::ClientBasedServerSelectFunc server_select_f;
++      uint client_server_select_range = cli_group[i].client_server_select_range;
++      if (!server_random_selection) {
++	server_select_f = simulation->make_server_select_alt_range(client_server_select_range);
++      } else {
++	server_select_f = simulation->make_server_select_ran_range(client_server_select_range);
++      }
++      return new test::DmcClient(id,
++				 server_post_f,
++				 std::bind(server_select_f, _1, id),
++				 test::dmc_client_accumulate_f,
++				 cli_inst[i]);
++    };
++
++#if 1
++    std::cout << "[global]" << std::endl << g_conf << std::endl;
++    for (uint i = 0; i < client_groups; ++i) {
++      std::cout << std::endl << "[client." << i << "]" << std::endl;
++      std::cout << cli_group[i] << std::endl;
++    }
++    for (uint i = 0; i < server_groups; ++i) {
++      std::cout << std::endl << "[server." << i << "]" << std::endl;
++      std::cout << srv_group[i] << std::endl;
++    }
++    std::cout << std::endl;
++#endif
++
++    simulation->add_servers(server_total_count, create_server_f);
++    simulation->add_clients(client_total_count, create_client_f);
++
++    simulation->run();
++    simulation->display_stats(std::cout,
++                              &test::server_data, &test::client_data,
++                              server_disp_filter, client_disp_filter);
++} // main
++
++
++void test::client_data(std::ostream& out,
++		 test::MySim* sim,
++		 test::MySim::ClientFilter client_disp_filter,
++		 int head_w, int data_w, int data_prec) {
++    // report how many ops were done by reservation and proportion for
++    // each client
++
++    int total_r = 0;
++    out << std::setw(head_w) << "res_ops:";
++    for (uint i = 0; i < sim->get_client_count(); ++i) {
++        const auto& client = sim->get_client(i);
++        auto r = client.get_accumulator().reservation_count;
++        total_r += r;
++        if (!client_disp_filter(i)) continue;
++        out << " " << std::setw(data_w) << r;
++    }
++    out << " " << std::setw(data_w) << std::setprecision(data_prec) <<
++        std::fixed << total_r << std::endl;
++
++    int total_p = 0;
++    out << std::setw(head_w) << "prop_ops:";
++    for (uint i = 0; i < sim->get_client_count(); ++i) {
++        const auto& client = sim->get_client(i);
++        auto p = client.get_accumulator().proportion_count;
++        total_p += p;
++        if (!client_disp_filter(i)) continue;
++        out << " " << std::setw(data_w) << p;
++    }
++    out << " " << std::setw(data_w) << std::setprecision(data_prec) <<
++        std::fixed << total_p << std::endl;
++}
++
++
++void test::server_data(std::ostream& out,
++		 test::MySim* sim,
++		 test::MySim::ServerFilter server_disp_filter,
++		 int head_w, int data_w, int data_prec) {
++    out << std::setw(head_w) << "res_ops:";
++    int total_r = 0;
++    for (uint i = 0; i < sim->get_server_count(); ++i) {
++        const auto& server = sim->get_server(i);
++        auto rc = server.get_accumulator().reservation_count;
++        total_r += rc;
++        if (!server_disp_filter(i)) continue;
++        out << " " << std::setw(data_w) << rc;
++    }
++    out << " " << std::setw(data_w) << std::setprecision(data_prec) <<
++        std::fixed << total_r << std::endl;
++
++    out << std::setw(head_w) << "prop_ops:";
++    int total_p = 0;
++    for (uint i = 0; i < sim->get_server_count(); ++i) {
++        const auto& server = sim->get_server(i);
++        auto pc = server.get_accumulator().proportion_count;
++        total_p += pc;
++        if (!server_disp_filter(i)) continue;
++        out << " " << std::setw(data_w) << pc;
++    }
++    out << " " << std::setw(data_w) << std::setprecision(data_prec) <<
++        std::fixed << total_p << std::endl;
++
++    const auto& q = sim->get_server(0).get_priority_queue();
++    out << std::endl <<
++	" k-way heap: " << q.get_heap_branching_factor() << std::endl
++	<< std::endl;
++
++#ifdef PROFILE
++    crimson::ProfileCombiner<std::chrono::nanoseconds> art_combiner;
++    crimson::ProfileCombiner<std::chrono::nanoseconds> rct_combiner;
++    for (uint i = 0; i < sim->get_server_count(); ++i) {
++      const auto& q = sim->get_server(i).get_priority_queue();
++      const auto& art = q.add_request_timer;
++      art_combiner.combine(art);
++      const auto& rct = q.request_complete_timer;
++      rct_combiner.combine(rct);
++    }
++    out << "Server add_request_timer: count:" << art_combiner.get_count() <<
++      ", mean:" << art_combiner.get_mean() <<
++      ", std_dev:" << art_combiner.get_std_dev() <<
++      ", low:" << art_combiner.get_low() <<
++      ", high:" << art_combiner.get_high() << std::endl;
++    out << "Server request_complete_timer: count:" << rct_combiner.get_count() <<
++      ", mean:" << rct_combiner.get_mean() <<
++      ", std_dev:" << rct_combiner.get_std_dev() <<
++      ", low:" << rct_combiner.get_low() <<
++      ", high:" << rct_combiner.get_high() << std::endl;
++    out << "Server combined mean: " <<
++      (art_combiner.get_mean() + rct_combiner.get_mean()) <<
++      std::endl;
++#endif
++}
diff --cc src/dmclock/sim/src/test_ssched.cc
index 00000000000,00000000000..e28b015cbdb
new file mode 100644
--- /dev/null
+++ b/src/dmclock/sim/src/test_ssched.cc
@@@ -1,0 -1,0 +1,33 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++
++/*
++ * Copyright (C) 2016 Red Hat Inc.
++ */
++
++
++#include "ssched_recs.h"
++#include "ssched_server.h"
++#include "ssched_client.h"
++
++#include "sim_recs.h"
++#include "sim_server.h"
++#include "sim_client.h"
++
++#include "test_ssched.h"
++
++
++namespace test = crimson::test_simple_scheduler;
++namespace ssched = crimson::simple_scheduler;
++
++
++void test::simple_server_accumulate_f(test::SimpleAccum& a,
++				      const ssched::NullData& add_info) {
++  ++a.request_count;
++}
++
++
++void test::simple_client_accumulate_f(test::SimpleAccum& a,
++				      const ssched::NullData& ignore) {
++  // empty
++}
diff --cc src/dmclock/sim/src/test_ssched.h
index 00000000000,00000000000..96ac33ff376
new file mode 100644
--- /dev/null
+++ b/src/dmclock/sim/src/test_ssched.h
@@@ -1,0 -1,0 +1,57 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++
++/*
++ * Copyright (C) 2016 Red Hat Inc.
++ */
++
++
++#include "ssched_server.h"
++#include "ssched_client.h"
++
++#include "sim_recs.h"
++#include "sim_server.h"
++#include "sim_client.h"
++
++#include "simulate.h"
++
++
++namespace crimson {
++  namespace test_simple_scheduler {
++
++    namespace ssched = crimson::simple_scheduler;
++    namespace sim = crimson::qos_simulation;
++
++    using Time = double;
++
++    struct SimpleAccum {
++      uint32_t request_count = 0;
++    };
++
++    using SimpleQueue = ssched::SimpleQueue<ClientId,sim::TestRequest,Time>;
++
++    using SimpleServer = sim::SimulatedServer<SimpleQueue,
++					      ssched::ReqParams,
++					      ssched::NullData,
++					      SimpleAccum>;
++    using SimpleClient = sim::SimulatedClient<ssched::ServiceTracker<ServerId>,
++					      ssched::ReqParams,
++					      ssched::NullData,
++					      SimpleAccum>;
++
++    using CreateQueueF =
++      std::function<SimpleQueue*(SimpleQueue::CanHandleRequestFunc,
++				 SimpleQueue::HandleRequestFunc)>;
++
++
++    using MySim = sim::Simulation<ServerId,ClientId,SimpleServer,SimpleClient>;
++  
++    using SubmitFunc = SimpleClient::SubmitFunc;
++
++    extern void simple_server_accumulate_f(SimpleAccum& a,
++					   const ssched::NullData& add_info);
++
++    extern void simple_client_accumulate_f(SimpleAccum& a,
++					   const ssched::NullData& ignore);
++  } // namespace test_simple
++} // namespace crimson
diff --cc src/dmclock/sim/src/test_ssched_main.cc
index 00000000000,00000000000..6df20dc5f89
new file mode 100644
--- /dev/null
+++ b/src/dmclock/sim/src/test_ssched_main.cc
@@@ -1,0 -1,0 +1,187 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++
++/*
++ * Copyright (C) 2016 Red Hat Inc.
++ */
++
++
++#include "test_ssched.h"
++
++
++#ifdef PROFILE
++#include "profile.h"
++#endif
++
++
++namespace test = crimson::test_simple_scheduler;
++namespace ssched = crimson::simple_scheduler;
++namespace sim = crimson::qos_simulation;
++
++using namespace std::placeholders;
++
++
++namespace crimson {
++  namespace test_simple_scheduler {
++    void client_data(std::ostream& out,
++		     test::MySim* sim,
++		     test::MySim::ClientFilter client_disp_filter,
++		     int head_w, int data_w, int data_prec);
++
++    void server_data(std::ostream& out,
++		     test::MySim* sim,
++		     test::MySim::ServerFilter server_disp_filter,
++		     int head_w, int data_w, int data_prec);
++  } // namespace test_simple
++} // namespace crimson
++    
++
++int main(int argc, char* argv[]) {
++  // server params
++
++  const uint server_count = 100;
++  const uint server_iops = 40;
++  const uint server_threads = 1;
++
++  // client params
++
++  const uint client_total_ops = 1000;
++  const uint client_count = 100;
++  const uint client_server_select_range = 10;
++  const uint client_wait_count = 1;
++  const uint client_iops_goal = 50;
++  const uint client_outstanding_ops = 100;
++  const std::chrono::seconds client_wait(10);
++
++  auto client_disp_filter = [=] (const ClientId& i) -> bool {
++    return i < 3 || i >= (client_count - 3);
++  };
++
++  auto server_disp_filter = [=] (const ServerId& i) -> bool {
++    return i < 3 || i >= (server_count - 3);
++  };
++
++
++  test::MySim *simulation;
++
++  // lambda to post a request to the identified server; called by client
++  test::SubmitFunc server_post_f =
++    [&simulation](const ServerId& server_id,
++		  const sim::TestRequest& request,
++		  const ClientId& client_id,
++		  const ssched::ReqParams& req_params) {
++    auto& server = simulation->get_server(server_id);
++    server.post(request, client_id, req_params);
++  };
++
++  static std::vector<sim::CliInst> no_wait =
++    { { sim::req_op, client_total_ops, client_iops_goal, client_outstanding_ops } };
++  static std::vector<sim::CliInst> wait =
++    { { sim::wait_op, client_wait },
++      { sim::req_op, client_total_ops, client_iops_goal, client_outstanding_ops } };
++
++  simulation = new test::MySim();
++
++#if 1
++  test::MySim::ClientBasedServerSelectFunc server_select_f =
++    simulation->make_server_select_alt_range(client_server_select_range);
++#elif 0
++  test::MySim::ClientBasedServerSelectFunc server_select_f =
++    std::bind(&test::MySim::server_select_random, simulation, _1, _2);
++#else
++  test::MySim::ClientBasedServerSelectFunc server_select_f =
++    std::bind(&test::MySim::server_select_0, simulation, _1, _2);
++#endif
++
++  test::SimpleServer::ClientRespFunc client_response_f =
++    [&simulation](ClientId client_id,
++		  const sim::TestResponse& resp,
++		  const ServerId& server_id,
++		  const ssched::NullData& resp_params) {
++    simulation->get_client(client_id).receive_response(resp,
++						       server_id,
++						       resp_params);
++  };
++
++  test::CreateQueueF create_queue_f =
++    [&](test::SimpleQueue::CanHandleRequestFunc can_f,
++	test::SimpleQueue::HandleRequestFunc handle_f) -> test::SimpleQueue* {
++    return new test::SimpleQueue(can_f, handle_f);
++  };
++
++  auto create_server_f = [&](ServerId id) -> test::SimpleServer* {
++    return new test::SimpleServer(id,
++				  server_iops, server_threads,
++				  client_response_f,
++				  test::simple_server_accumulate_f,
++				  create_queue_f);
++  };
++
++  auto create_client_f = [&](ClientId id) -> test::SimpleClient* {
++    return new test::SimpleClient(id,
++				  server_post_f,
++				  std::bind(server_select_f, _1, id),
++				  test::simple_client_accumulate_f,
++				  id < (client_count - client_wait_count)
++				  ? no_wait : wait);
++  };
++
++  simulation->add_servers(server_count, create_server_f);
++  simulation->add_clients(client_count, create_client_f);
++
++  simulation->run();
++  simulation->display_stats(std::cout,
++			    &test::server_data, &test::client_data,
++			    server_disp_filter, client_disp_filter);
++} // main
++
++
++void test::client_data(std::ostream& out,
++		       test::MySim* sim,
++		       test::MySim::ClientFilter client_disp_filter,
++		       int head_w, int data_w, int data_prec) {
++  // empty
++}
++
++
++void test::server_data(std::ostream& out,
++		       test::MySim* sim,
++		       test::MySim::ServerFilter server_disp_filter,
++		       int head_w, int data_w, int data_prec) {
++  out << std::setw(head_w) << "requests:";
++  int total_req = 0;
++  for (uint i = 0; i < sim->get_server_count(); ++i) {
++    const auto& server = sim->get_server(i);
++    auto req_count = server.get_accumulator().request_count;
++    total_req += req_count;
++    if (!server_disp_filter(i)) continue;
++    out << std::setw(data_w) << req_count;
++  }
++  out << std::setw(data_w) << std::setprecision(data_prec) <<
++    std::fixed << total_req << std::endl;
++
++#ifdef PROFILE
++    crimson::ProfileCombiner<std::chrono::nanoseconds> art_combiner;
++    crimson::ProfileCombiner<std::chrono::nanoseconds> rct_combiner;
++    for (uint i = 0; i < sim->get_server_count(); ++i) {
++      const auto& q = sim->get_server(i).get_priority_queue();
++      const auto& art = q.add_request_timer;
++      art_combiner.combine(art);
++      const auto& rct = q.request_complete_timer;
++      rct_combiner.combine(rct);
++    }
++    out << "Server add_request_timer: count:" << art_combiner.get_count() <<
++      ", mean:" << art_combiner.get_mean() <<
++      ", std_dev:" << art_combiner.get_std_dev() <<
++      ", low:" << art_combiner.get_low() <<
++      ", high:" << art_combiner.get_high() << std::endl;
++    out << "Server request_complete_timer: count:" << rct_combiner.get_count() <<
++      ", mean:" << rct_combiner.get_mean() <<
++      ", std_dev:" << rct_combiner.get_std_dev() <<
++      ", low:" << rct_combiner.get_low() <<
++      ", high:" << rct_combiner.get_high() << std::endl;
++    out << "Server combined mean: " <<
++      (art_combiner.get_mean() + rct_combiner.get_mean()) <<
++      std::endl;
++#endif
++}
diff --cc src/dmclock/src/CMakeLists.txt
index 00000000000,00000000000..691e64cce43
new file mode 100644
--- /dev/null
+++ b/src/dmclock/src/CMakeLists.txt
@@@ -1,0 -1,0 +1,19 @@@
++include_directories(../support/src)
++include_directories(${BOOST_INCLUDE_DIR})
++
++set(local_flags "-Wall -pthread")
++
++set(dmc_srcs dmclock_util.cc ../support/src/run_every.cc)
++
++set_source_files_properties(${dmc_srcs}
++  PROPERTIES
++  COMPILE_FLAGS "${local_flags}"
++  )
++
++if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
++  set(warnings_off " -Wno-unused-variable -Wno-unused-function")
++elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
++  set(warnings_off " -Wno-unused-but-set-variable -Wno-unused-function")
++endif()
++
++add_library(dmclock STATIC ${dmc_srcs})
diff --cc src/dmclock/src/dmclock_client.h
index 00000000000,00000000000..b44e1211b53
new file mode 100644
--- /dev/null
+++ b/src/dmclock/src/dmclock_client.h
@@@ -1,0 -1,0 +1,194 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++
++/*
++ * Copyright (C) 2017 Red Hat Inc.
++ */
++
++
++#pragma once
++
++#include <map>
++#include <deque>
++#include <chrono>
++#include <thread>
++#include <mutex>
++#include <condition_variable>
++
++#include "run_every.h"
++#include "dmclock_util.h"
++#include "dmclock_recs.h"
++
++#include "gtest/gtest_prod.h"
++
++
++namespace crimson {
++  namespace dmclock {
++    struct ServerInfo {
++      Counter   delta_prev_req;
++      Counter   rho_prev_req;
++      uint32_t  my_delta;
++      uint32_t  my_rho;
++
++      ServerInfo(Counter _delta_prev_req,
++		 Counter _rho_prev_req) :
++	delta_prev_req(_delta_prev_req),
++	rho_prev_req(_rho_prev_req),
++	my_delta(0),
++	my_rho(0)
++      {
++	// empty
++      }
++
++      inline void req_update(Counter delta, Counter rho) {
++	delta_prev_req = delta;
++	rho_prev_req = rho;
++	my_delta = 0;
++	my_rho = 0;
++      }
++
++      inline void resp_update(PhaseType phase) {
++	++my_delta;
++	if (phase == PhaseType::reservation) ++my_rho;
++      }
++    };
++
++
++    // S is server identifier type
++    template<typename S>
++    class ServiceTracker {
++      FRIEND_TEST(dmclock_client, server_erase);
++
++      using TimePoint = decltype(std::chrono::steady_clock::now());
++      using Duration = std::chrono::milliseconds;
++      using MarkPoint = std::pair<TimePoint,Counter>;
++
++      Counter                 delta_counter; // # reqs completed
++      Counter                 rho_counter;   // # reqs completed via reservation
++      std::map<S,ServerInfo>  server_map;
++      mutable std::mutex      data_mtx;      // protects Counters and map
++
++      using DataGuard = std::lock_guard<decltype(data_mtx)>;
++
++      // clean config
++
++      std::deque<MarkPoint>     clean_mark_points;
++      Duration                  clean_age;     // age at which ServerInfo cleaned
++
++      // NB: All threads declared at end, so they're destructed firs!
++
++      std::unique_ptr<RunEvery> cleaning_job;
++
++
++    public:
++
++      // we have to start the counters at 1, as 0 is used in the
++      // cleaning process
++      template<typename Rep, typename Per>
++      ServiceTracker(std::chrono::duration<Rep,Per> _clean_every,
++		     std::chrono::duration<Rep,Per> _clean_age) :
++	delta_counter(1),
++	rho_counter(1),
++	clean_age(std::chrono::duration_cast<Duration>(_clean_age))
++      {
++	cleaning_job =
++	  std::unique_ptr<RunEvery>(
++	    new RunEvery(_clean_every,
++			 std::bind(&ServiceTracker::do_clean, this)));
++      }
++
++
++      // the reason we're overloading the constructor rather than
++      // using default values for the arguments is so that callers
++      // have to either use all defaults or specify all timings; with
++      // default arguments they could specify some without others
++      ServiceTracker() :
++	ServiceTracker(std::chrono::minutes(5), std::chrono::minutes(10))
++      {
++	// empty
++      }
++
++
++      /*
++       * Incorporates the RespParams received into the various counter.
++       */
++      void track_resp(const S& server_id, const PhaseType& phase) {
++	DataGuard g(data_mtx);
++
++	auto it = server_map.find(server_id);
++	if (server_map.end() == it) {
++	  // this code can only run if a request did not precede the
++	  // response or if the record was cleaned up b/w when
++	  // the request was made and now
++	  ServerInfo si(delta_counter, rho_counter);
++	  si.resp_update(phase);
++	  server_map.emplace(server_id, si);
++	} else {
++	  it->second.resp_update(phase);
++	}
++
++	++delta_counter;
++	if (PhaseType::reservation == phase) {
++	  ++rho_counter;
++	}
++      }
++
++
++      /*
++       * Returns the ReqParams for the given server.
++       */
++      ReqParams get_req_params(const S& server) {
++	DataGuard g(data_mtx);
++	auto it = server_map.find(server);
++	if (server_map.end() == it) {
++	  server_map.emplace(server, ServerInfo(delta_counter, rho_counter));
++	  return ReqParams(1, 1);
++	} else {
++	  Counter delta =
++	    1 + delta_counter - it->second.delta_prev_req - it->second.my_delta;
++	  Counter rho =
++	    1 + rho_counter - it->second.rho_prev_req - it->second.my_rho;
++
++	  it->second.req_update(delta_counter, rho_counter);
++
++	  return ReqParams(uint32_t(delta), uint32_t(rho));
++	}
++      }
++
++    private:
++
++      /*
++       * This is being called regularly by RunEvery. Every time it's
++       * called it notes the time and delta counter (mark point) in a
++       * deque. It also looks at the deque to find the most recent
++       * mark point that is older than clean_age. It then walks the
++       * map and delete all server entries that were last used before
++       * that mark point.
++       */
++      void do_clean() {
++	TimePoint now = std::chrono::steady_clock::now();
++	DataGuard g(data_mtx);
++	clean_mark_points.emplace_back(MarkPoint(now, delta_counter));
++
++	Counter earliest = 0;
++	auto point = clean_mark_points.front();
++	while (point.first <= now - clean_age) {
++	  earliest = point.second;
++	  clean_mark_points.pop_front();
++	  point = clean_mark_points.front();
++	}
++
++	if (earliest > 0) {
++	  for (auto i = server_map.begin();
++	       i != server_map.end();
++	       /* empty */) {
++	    auto i2 = i++;
++	    if (i2->second.delta_prev_req <= earliest) {
++	      server_map.erase(i2);
++	    }
++	  }
++	}
++      } // do_clean
++    }; // class ServiceTracker
++  }
++}
diff --cc src/dmclock/src/dmclock_recs.h
index 00000000000,00000000000..f7a5aaadb10
new file mode 100644
--- /dev/null
+++ b/src/dmclock/src/dmclock_recs.h
@@@ -1,0 -1,0 +1,61 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++
++/*
++ * Copyright (C) 2017 Red Hat Inc.
++ */
++
++
++#pragma once
++
++
++#include <ostream>
++#include <assert.h>
++
++
++namespace crimson {
++  namespace dmclock {
++    using Counter = uint64_t;
++
++    enum class PhaseType { reservation, priority };
++
++    inline std::ostream& operator<<(std::ostream& out, const PhaseType& phase) {
++      out << (PhaseType::reservation == phase ? "reservation" : "priority");
++      return out;
++    }
++
++    struct ReqParams {
++      // count of all replies since last request; MUSTN'T BE 0
++      uint32_t delta;
++
++      // count of reservation replies since last request; MUSTN'T BE 0
++      uint32_t rho;
++
++      ReqParams(uint32_t _delta, uint32_t _rho) :
++	delta(_delta),
++	rho(_rho)
++      {
++	assert(0 != delta && 0 != rho && rho <= delta);
++      }
++
++      ReqParams() :
++	ReqParams(1, 1)
++      {
++	// empty
++      }
++
++      ReqParams(const ReqParams& other) :
++	delta(other.delta),
++	rho(other.rho)
++      {
++	// empty
++      }
++
++      friend std::ostream& operator<<(std::ostream& out, const ReqParams& rp) {
++	out << "ReqParams{ delta:" << rp.delta <<
++	  ", rho:" << rp.rho << " }";
++	return out;
++      }
++    }; // class ReqParams
++  }
++}
diff --cc src/dmclock/src/dmclock_server.h
index 00000000000,00000000000..65013063fa7
new file mode 100644
--- /dev/null
+++ b/src/dmclock/src/dmclock_server.h
@@@ -1,0 -1,0 +1,1588 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++
++/*
++ * Copyright (C) 2017 Red Hat Inc.
++ */
++
++
++#pragma once
++
++/* COMPILATION OPTIONS
++ *
++ * By default we include an optimization over the originally published
++ * dmclock algorithm using not the values of rho and delta that were
++ * sent in with a request but instead the most recent rho and delta
++ * values from the requests's client. To restore the algorithm's
++ * original behavior, define DO_NOT_DELAY_TAG_CALC (i.e., compiler
++ * argument -DDO_NOT_DELAY_TAG_CALC).
++ *
++ * The prop_heap does not seem to be necessary. The only thing it
++ * would help with is quickly finding the mininum proportion/prioity
++ * when an idle client became active. To have the code maintain the
++ * proportional heap, define USE_PROP_HEAP (i.e., compiler argument
++ * -DUSE_PROP_HEAP).
++ */
++
++#include <assert.h>
++
++#include <cmath>
++#include <memory>
++#include <map>
++#include <deque>
++#include <queue>
++#include <atomic>
++#include <mutex>
++#include <condition_variable>
++#include <thread>
++#include <iostream>
++#include <sstream>
++#include <limits>
++
++#include <boost/variant.hpp>
++
++#include "indirect_intrusive_heap.h"
++#include "run_every.h"
++#include "dmclock_util.h"
++#include "dmclock_recs.h"
++
++#ifdef PROFILE
++#include "profile.h"
++#endif
++
++#include "gtest/gtest_prod.h"
++
++
++namespace crimson {
++
++  namespace dmclock {
++
++    namespace c = crimson;
++
++    constexpr double max_tag = std::numeric_limits<double>::is_iec559 ?
++      std::numeric_limits<double>::infinity() :
++      std::numeric_limits<double>::max();
++    constexpr double min_tag = std::numeric_limits<double>::is_iec559 ?
++      -std::numeric_limits<double>::infinity() :
++      std::numeric_limits<double>::lowest();
++    constexpr uint tag_modulo = 1000000;
++
++    struct ClientInfo {
++      const double reservation;  // minimum
++      const double weight;       // proportional
++      const double limit;        // maximum
++
++      // multiplicative inverses of above, which we use in calculations
++      // and don't want to recalculate repeatedly
++      const double reservation_inv;
++      const double weight_inv;
++      const double limit_inv;
++
++      // order parameters -- min, "normal", max
++      ClientInfo(double _reservation, double _weight, double _limit) :
++	reservation(_reservation),
++	weight(_weight),
++	limit(_limit),
++	reservation_inv(0.0 == reservation ? 0.0 : 1.0 / reservation),
++	weight_inv(     0.0 == weight      ? 0.0 : 1.0 / weight),
++	limit_inv(      0.0 == limit       ? 0.0 : 1.0 / limit)
++      {
++	// empty
++      }
++
++
++      friend std::ostream& operator<<(std::ostream& out,
++				      const ClientInfo& client) {
++	out <<
++	  "{ ClientInfo:: r:" << client.reservation <<
++	  " w:" << std::fixed << client.weight <<
++	  " l:" << std::fixed << client.limit <<
++	  " 1/r:" << std::fixed << client.reservation_inv <<
++	  " 1/w:" << std::fixed << client.weight_inv <<
++	  " 1/l:" << std::fixed << client.limit_inv <<
++	  " }";
++	return out;
++      }
++    }; // class ClientInfo
++
++
++    struct RequestTag {
++      double reservation;
++      double proportion;
++      double limit;
++      bool   ready; // true when within limit
++#ifndef DO_NOT_DELAY_TAG_CALC
++      Time   arrival;
++#endif
++
++      RequestTag(const RequestTag& prev_tag,
++		 const ClientInfo& client,
++		 const ReqParams& req_params,
++		 const Time& time,
++		 const double cost = 0.0) :
++	reservation(cost + tag_calc(time,
++				    prev_tag.reservation,
++				    client.reservation_inv,
++				    req_params.rho,
++				    true)),
++	proportion(tag_calc(time,
++			    prev_tag.proportion,
++			    client.weight_inv,
++			    req_params.delta,
++			    true)),
++	limit(tag_calc(time,
++		       prev_tag.limit,
++		       client.limit_inv,
++		       req_params.delta,
++		       false)),
++	ready(false)
++#ifndef DO_NOT_DELAY_TAG_CALC
++	, arrival(time)
++#endif
++      {
++	assert(reservation < max_tag || proportion < max_tag);
++      }
++
++      RequestTag(double _res, double _prop, double _lim, const Time& _arrival) :
++	reservation(_res),
++	proportion(_prop),
++	limit(_lim),
++	ready(false)
++#ifndef DO_NOT_DELAY_TAG_CALC
++	, arrival(_arrival)
++#endif
++      {
++	assert(reservation < max_tag || proportion < max_tag);
++      }
++
++      RequestTag(const RequestTag& other) :
++	reservation(other.reservation),
++	proportion(other.proportion),
++	limit(other.limit),
++	ready(other.ready)
++#ifndef DO_NOT_DELAY_TAG_CALC
++	, arrival(other.arrival)
++#endif
++      {
++	// empty
++      }
++
++      static std::string format_tag_change(double before, double after) {
++	if (before == after) {
++	  return std::string("same");
++	} else {
++	  std::stringstream ss;
++	  ss << format_tag(before) << "=>" << format_tag(after);
++	  return ss.str();
++	}
++      }
++
++      static std::string format_tag(double value) {
++	if (max_tag == value) {
++	  return std::string("max");
++	} else if (min_tag == value) {
++	  return std::string("min");
++	} else {
++	  return format_time(value, tag_modulo);
++	}
++      }
++
++    private:
++
++      static double tag_calc(const Time& time,
++			     double prev,
++			     double increment,
++			     uint32_t dist_req_val,
++			     bool extreme_is_high) {
++	if (0.0 == increment) {
++	  return extreme_is_high ? max_tag : min_tag;
++	} else {
++	  if (0 != dist_req_val) {
++	    increment *= dist_req_val;
++	  }
++	  return std::max(time, prev + increment);
++	}
++      }
++
++      friend std::ostream& operator<<(std::ostream& out,
++				      const RequestTag& tag) {
++	out <<
++	  "{ RequestTag:: ready:" << (tag.ready ? "true" : "false") <<
++	  " r:" << format_tag(tag.reservation) <<
++	  " p:" << format_tag(tag.proportion) <<
++	  " l:" << format_tag(tag.limit) <<
++#if 0 // try to resolve this to make sure Time is operator<<'able.
++#ifndef DO_NOT_DELAY_TAG_CALC
++	  " arrival:" << tag.arrival <<
++#endif
++#endif
++	  " }";
++	return out;
++      }
++    }; // class RequestTag
++
++
++    // C is client identifier type, R is request type, B is heap
++    // branching factor
++    template<typename C, typename R, uint B>
++    class PriorityQueueBase {
++      FRIEND_TEST(dmclock_server, client_idle_erase);
++
++    public:
++
++      using RequestRef = std::unique_ptr<R>;
++
++    protected:
++
++      using TimePoint = decltype(std::chrono::steady_clock::now());
++      using Duration = std::chrono::milliseconds;
++      using MarkPoint = std::pair<TimePoint,Counter>;
++
++      enum class ReadyOption {ignore, lowers, raises};
++
++      // forward decl for friend decls
++      template<double RequestTag::*, ReadyOption, bool>
++      struct ClientCompare;
++
++      class ClientReq {
++	friend PriorityQueueBase;
++
++	RequestTag tag;
++	C          client_id;
++	RequestRef request;
++
++      public:
++
++	ClientReq(const RequestTag& _tag,
++		  const C&          _client_id,
++		  RequestRef&&      _request) :
++	  tag(_tag),
++	  client_id(_client_id),
++	  request(std::move(_request))
++	{
++	  // empty
++	}
++
++	friend std::ostream& operator<<(std::ostream& out, const ClientReq& c) {
++	  out << "{ ClientReq:: tag:" << c.tag << " client:" <<
++	    c.client_id << " }";
++	  return out;
++	}
++      }; // class ClientReq
++
++    public:
++
++      // NOTE: ClientRec is in the "public" section for compatibility
++      // with g++ 4.8.4, which complains if it's not. By g++ 6.3.1
++      // ClientRec could be "protected" with no issue. [See comments
++      // associated with function submit_top_request.]
++      class ClientRec {
++	friend PriorityQueueBase<C,R,B>;
++
++	C                     client;
++	RequestTag            prev_tag;
++	std::deque<ClientReq> requests;
++
++	// amount added from the proportion tag as a result of
++	// an idle client becoming unidle
++	double                prop_delta = 0.0;
++
++	c::IndIntruHeapData   reserv_heap_data;
++	c::IndIntruHeapData   lim_heap_data;
++	c::IndIntruHeapData   ready_heap_data;
++#if USE_PROP_HEAP
++	c::IndIntruHeapData   prop_heap_data;
++#endif
++
++      public:
++
++	ClientInfo            info;
++	bool                  idle;
++	Counter               last_tick;
++	uint32_t              cur_rho;
++	uint32_t              cur_delta;
++
++	ClientRec(C _client,
++		  const ClientInfo& _info,
++		  Counter current_tick) :
++	  client(_client),
++	  prev_tag(0.0, 0.0, 0.0, TimeZero),
++	  info(_info),
++	  idle(true),
++	  last_tick(current_tick),
++	  cur_rho(1),
++	  cur_delta(1)
++	{
++	  // empty
++	}
++
++	inline const RequestTag& get_req_tag() const {
++	  return prev_tag;
++	}
++
++	static inline void assign_unpinned_tag(double& lhs, const double rhs) {
++	  if (rhs != max_tag && rhs != min_tag) {
++	    lhs = rhs;
++	  }
++	}
++
++	inline void update_req_tag(const RequestTag& _prev,
++				   const Counter& _tick) {
++	  assign_unpinned_tag(prev_tag.reservation, _prev.reservation);
++	  assign_unpinned_tag(prev_tag.limit, _prev.limit);
++	  assign_unpinned_tag(prev_tag.proportion, _prev.proportion);
++	  last_tick = _tick;
++	}
++
++	inline void add_request(const RequestTag& tag,
++				const C&          client_id,
++				RequestRef&&      request) {
++	  requests.emplace_back(ClientReq(tag, client_id, std::move(request)));
++	}
++
++	inline const ClientReq& next_request() const {
++	  return requests.front();
++	}
++
++	inline ClientReq& next_request() {
++	  return requests.front();
++	}
++
++	inline void pop_request() {
++	  requests.pop_front();
++	}
++
++	inline bool has_request() const {
++	  return !requests.empty();
++	}
++
++	inline size_t request_count() const {
++	  return requests.size();
++	}
++
++	// NB: because a deque is the underlying structure, this
++	// operation might be expensive
++	bool remove_by_req_filter_fw(std::function<bool(const R&)> filter_accum) {
++	  bool any_removed = false;
++	  for (auto i = requests.begin();
++	       i != requests.end();
++	       /* no inc */) {
++	    if (filter_accum(*i->request)) {
++	      any_removed = true;
++	      i = requests.erase(i);
++	    } else {
++	      ++i;
++	    }
++	  }
++	  return any_removed;
++	}
++
++	// NB: because a deque is the underlying structure, this
++	// operation might be expensive
++	bool remove_by_req_filter_bw(std::function<bool(const R&)> filter_accum) {
++	  bool any_removed = false;
++	  for (auto i = requests.rbegin();
++	       i != requests.rend();
++	       /* no inc */) {
++	    if (filter_accum(*i->request)) {
++	      any_removed = true;
++	      i = decltype(i){ requests.erase(std::next(i).base()) };
++	    } else {
++	      ++i;
++	    }
++	  }
++	  return any_removed;
++	}
++
++	inline bool
++	remove_by_req_filter(std::function<bool(const R&)> filter_accum,
++			     bool visit_backwards) {
++	  if (visit_backwards) {
++	    return remove_by_req_filter_bw(filter_accum);
++	  } else {
++	    return remove_by_req_filter_fw(filter_accum);
++	  }
++	}
++
++	friend std::ostream&
++	operator<<(std::ostream& out,
++		   const typename PriorityQueueBase<C,R,B>::ClientRec& e) {
++	  out << "{ ClientRec::" <<
++	    " client:" << e.client <<
++	    " prev_tag:" << e.prev_tag <<
++	    " req_count:" << e.requests.size() <<
++	    " top_req:";
++	  if (e.has_request()) {
++	    out << e.next_request();
++	  } else {
++	    out << "none";
++	  }
++	  out << " }";
++
++	  return out;
++	}
++      }; // class ClientRec
++
++      using ClientRecRef = std::shared_ptr<ClientRec>;
++
++      // when we try to get the next request, we'll be in one of three
++      // situations -- we'll have one to return, have one that can
++      // fire in the future, or not have any
++      enum class NextReqType { returning, future, none };
++
++      // specifies which queue next request will get popped from
++      enum class HeapId { reservation, ready };
++
++      // this is returned from next_req to tell the caller the situation
++      struct NextReq {
++	NextReqType type;
++	union {
++	  HeapId    heap_id;
++	  Time      when_ready;
++	};
++      };
++
++
++      // a function that can be called to look up client information
++      using ClientInfoFunc = std::function<ClientInfo(const C&)>;
++
++
++      bool empty() const {
++	DataGuard g(data_mtx);
++	return (resv_heap.empty() || ! resv_heap.top().has_request());
++      }
++
++
++      size_t client_count() const {
++	DataGuard g(data_mtx);
++	return resv_heap.size();
++      }
++
++
++      size_t request_count() const {
++	DataGuard g(data_mtx);
++	size_t total = 0;
++	for (auto i = resv_heap.cbegin(); i != resv_heap.cend(); ++i) {
++	  total += i->request_count();
++	}
++	return total;
++      }
++
++
++      bool remove_by_req_filter(std::function<bool(const R&)> filter_accum,
++				bool visit_backwards = false) {
++	bool any_removed = false;
++	DataGuard g(data_mtx);
++	for (auto i : client_map) {
++	  bool modified =
++	    i.second->remove_by_req_filter(filter_accum, visit_backwards);
++	  if (modified) {
++	    resv_heap.adjust(*i.second);
++	    limit_heap.adjust(*i.second);
++	    ready_heap.adjust(*i.second);
++#if USE_PROP_HEAP
++	    prop_heap.adjust(*i.second);
++#endif
++	    any_removed = true;
++	  }
++	}
++	return any_removed;
++      }
++
++
++      // use as a default value when no accumulator is provide
++      static void request_sink(const R& req) {
++	// do nothing
++      }
++
++
++      void remove_by_client(const C& client,
++			    bool reverse = false,
++			    std::function<void (const R&)> accum = request_sink) {
++	DataGuard g(data_mtx);
++
++	auto i = client_map.find(client);
++
++	if (i == client_map.end()) return;
++
++	if (reverse) {
++	  for (auto j = i->second->requests.rbegin();
++	       j != i->second->requests.rend();
++	       ++j) {
++	    accum(*j->request);
++	  }
++	} else {
++	  for (auto j = i->second->requests.begin();
++	       j != i->second->requests.end();
++	       ++j) {
++	    accum(*j->request);
++	  }
++	}
++
++	i->second->requests.clear();
++
++	resv_heap.adjust(*i->second);
++	limit_heap.adjust(*i->second);
++	ready_heap.adjust(*i->second);
++#if USE_PROP_HEAP
++	prop_heap.adjust(*i->second);
++#endif
++      }
++
++
++      uint get_heap_branching_factor() const {
++	return B;
++      }
++
++
++      friend std::ostream& operator<<(std::ostream& out,
++				      const PriorityQueueBase& q) {
++	std::lock_guard<decltype(q.data_mtx)> guard(q.data_mtx);
++
++	out << "{ PriorityQueue::";
++	for (const auto& c : q.client_map) {
++	  out << "  { client:" << c.first << ", record:" << *c.second <<
++	    " }";
++	}
++	if (!q.resv_heap.empty()) {
++	  const auto& resv = q.resv_heap.top();
++	  out << " { reservation_top:" << resv << " }";
++	  const auto& ready = q.ready_heap.top();
++	  out << " { ready_top:" << ready << " }";
++	  const auto& limit = q.limit_heap.top();
++	  out << " { limit_top:" << limit << " }";
++	} else {
++	  out << " HEAPS-EMPTY";
++	}
++	out << " }";
++
++	return out;
++      }
++
++      // for debugging
++      void display_queues(std::ostream& out,
++			  bool show_res = true,
++			  bool show_lim = true,
++			  bool show_ready = true,
++			  bool show_prop = true) const {
++	auto filter = [](const ClientRec& e)->bool { return true; };
++	DataGuard g(data_mtx);
++	if (show_res) {
++	  resv_heap.display_sorted(out << "RESER:", filter);
++	}
++	if (show_lim) {
++	  limit_heap.display_sorted(out << "LIMIT:", filter);
++	}
++	if (show_ready) {
++	  ready_heap.display_sorted(out << "READY:", filter);
++	}
++#if USE_PROP_HEAP
++	if (show_prop) {
++	  prop_heap.display_sorted(out << "PROPO:", filter);
++	}
++#endif
++      } // display_queues
++
++
++    protected:
++
++      // The ClientCompare functor is essentially doing a precedes?
++      // operator, returning true if and only if the first parameter
++      // must precede the second parameter. If the second must precede
++      // the first, or if they are equivalent, false should be
++      // returned. The reason for this behavior is that it will be
++      // called to test if two items are out of order and if true is
++      // returned it will reverse the items. Therefore false is the
++      // default return when it doesn't matter to prevent unnecessary
++      // re-ordering.
++      //
++      // The template is supporting variations in sorting based on the
++      // heap in question and allowing these variations to be handled
++      // at compile-time.
++      //
++      // tag_field determines which tag is being used for comparison
++      //
++      // ready_opt determines how the ready flag influences the sort
++      //
++      // use_prop_delta determines whether the proportional delta is
++      // added in for comparison
++      template<double RequestTag::*tag_field,
++	       ReadyOption ready_opt,
++	       bool use_prop_delta>
++      struct ClientCompare {
++	bool operator()(const ClientRec& n1, const ClientRec& n2) const {
++	  if (n1.has_request()) {
++	    if (n2.has_request()) {
++	      const auto& t1 = n1.next_request().tag;
++	      const auto& t2 = n2.next_request().tag;
++	      if (ReadyOption::ignore == ready_opt || t1.ready == t2.ready) {
++		// if we don't care about ready or the ready values are the same
++		if (use_prop_delta) {
++		  return (t1.*tag_field + n1.prop_delta) <
++		    (t2.*tag_field + n2.prop_delta);
++		} else {
++		  return t1.*tag_field < t2.*tag_field;
++		}
++	      } else if (ReadyOption::raises == ready_opt) {
++		// use_ready == true && the ready fields are different
++		return t1.ready;
++	      } else {
++		return t2.ready;
++	      }
++	    } else {
++	      // n1 has request but n2 does not
++	      return true;
++	    }
++	  } else if (n2.has_request()) {
++	    // n2 has request but n1 does not
++	    return false;
++	  } else {
++	    // both have none; keep stable w false
++	    return false;
++	  }
++	}
++      };
++
++      ClientInfoFunc       client_info_f;
++
++      mutable std::mutex data_mtx;
++      using DataGuard = std::lock_guard<decltype(data_mtx)>;
++
++      // stable mapping between client ids and client queues
++      std::map<C,ClientRecRef> client_map;
++
++      c::IndIntruHeap<ClientRecRef,
++		      ClientRec,
++		      &ClientRec::reserv_heap_data,
++		      ClientCompare<&RequestTag::reservation,
++				    ReadyOption::ignore,
++				    false>,
++		      B> resv_heap;
++#if USE_PROP_HEAP
++      c::IndIntruHeap<ClientRecRef,
++		      ClientRec,
++		      &ClientRec::prop_heap_data,
++		      ClientCompare<&RequestTag::proportion,
++				    ReadyOption::ignore,
++				    true>,
++		      B> prop_heap;
++#endif
++      c::IndIntruHeap<ClientRecRef,
++		      ClientRec,
++		      &ClientRec::lim_heap_data,
++		      ClientCompare<&RequestTag::limit,
++				    ReadyOption::lowers,
++				    false>,
++		      B> limit_heap;
++      c::IndIntruHeap<ClientRecRef,
++		      ClientRec,
++		      &ClientRec::ready_heap_data,
++		      ClientCompare<&RequestTag::proportion,
++				    ReadyOption::raises,
++				    true>,
++		      B> ready_heap;
++
++      // if all reservations are met and all other requestes are under
++      // limit, this will allow the request next in terms of
++      // proportion to still get issued
++      bool             allow_limit_break;
++
++      std::atomic_bool finishing;
++
++      // every request creates a tick
++      Counter tick = 0;
++
++      // performance data collection
++      size_t reserv_sched_count = 0;
++      size_t prop_sched_count = 0;
++      size_t limit_break_sched_count = 0;
++
++      Duration                  idle_age;
++      Duration                  erase_age;
++      Duration                  check_time;
++      std::deque<MarkPoint>     clean_mark_points;
++
++      // NB: All threads declared at end, so they're destructed first!
++
++      std::unique_ptr<RunEvery> cleaning_job;
++
++
++      // COMMON constructor that others feed into; we can accept three
++      // different variations of durations
++      template<typename Rep, typename Per>
++      PriorityQueueBase(ClientInfoFunc _client_info_f,
++			std::chrono::duration<Rep,Per> _idle_age,
++			std::chrono::duration<Rep,Per> _erase_age,
++			std::chrono::duration<Rep,Per> _check_time,
++			bool _allow_limit_break) :
++	client_info_f(_client_info_f),
++	allow_limit_break(_allow_limit_break),
++	finishing(false),
++	idle_age(std::chrono::duration_cast<Duration>(_idle_age)),
++	erase_age(std::chrono::duration_cast<Duration>(_erase_age)),
++	check_time(std::chrono::duration_cast<Duration>(_check_time))
++      {
++	assert(_erase_age >= _idle_age);
++	assert(_check_time < _idle_age);
++	cleaning_job =
++	  std::unique_ptr<RunEvery>(
++	    new RunEvery(check_time,
++			 std::bind(&PriorityQueueBase::do_clean, this)));
++      }
++
++
++      ~PriorityQueueBase() {
++	finishing = true;
++      }
++
++
++      // data_mtx must be held by caller
++      void do_add_request(RequestRef&&     request,
++			  const C&         client_id,
++			  const ReqParams& req_params,
++			  const Time       time,
++			  const double     cost = 0.0) {
++	++tick;
++
++	// this pointer will help us create a reference to a shared
++	// pointer, no matter which of two codepaths we take
++	ClientRec* temp_client;
++
++	auto client_it = client_map.find(client_id);
++	if (client_map.end() != client_it) {
++	  temp_client = &(*client_it->second); // address of obj of shared_ptr
++	} else {
++	  ClientInfo info = client_info_f(client_id);
++	  ClientRecRef client_rec =
++	    std::make_shared<ClientRec>(client_id, info, tick);
++	  resv_heap.push(client_rec);
++#if USE_PROP_HEAP
++	  prop_heap.push(client_rec);
++#endif
++	  limit_heap.push(client_rec);
++	  ready_heap.push(client_rec);
++	  client_map[client_id] = client_rec;
++	  temp_client = &(*client_rec); // address of obj of shared_ptr
++	}
++
++	// for convenience, we'll create a reference to the shared pointer
++	ClientRec& client = *temp_client;
++
++	if (client.idle) {
++	  // We need to do an adjustment so that idle clients compete
++	  // fairly on proportional tags since those tags may have
++	  // drifted from real-time. Either use the lowest existing
++	  // proportion tag -- O(1) -- or the client with the lowest
++	  // previous proportion tag -- O(n) where n = # clients.
++	  //
++	  // So we don't have to maintain a propotional queue that
++	  // keeps the minimum on proportional tag alone (we're
++	  // instead using a ready queue), we'll have to check each
++	  // client.
++	  //
++	  // The alternative would be to maintain a proportional queue
++	  // (define USE_PROP_TAG) and do an O(1) operation here.
++
++	  // Was unable to confirm whether equality testing on
++	  // std::numeric_limits<double>::max() is guaranteed, so
++	  // we'll use a compile-time calculated trigger that is one
++	  // third the max, which should be much larger than any
++	  // expected organic value.
++	  constexpr double lowest_prop_tag_trigger =
++	    std::numeric_limits<double>::max() / 3.0;
++
++	  double lowest_prop_tag = std::numeric_limits<double>::max();
++	  for (auto const &c : client_map) {
++	    // don't use ourselves (or anything else that might be
++	    // listed as idle) since we're now in the map
++	    if (!c.second->idle) {
++	      double p;
++	      // use either lowest proportion tag or previous proportion tag
++	      if (c.second->has_request()) {
++		p = c.second->next_request().tag.proportion +
++		  c.second->prop_delta;
++	      } else {
++	        p = c.second->get_req_tag().proportion + c.second->prop_delta;
++	      }
++
++	      if (p < lowest_prop_tag) {
++		lowest_prop_tag = p;
++	      }
++	    }
++	  }
++
++	  // if this conditional does not fire, it
++	  if (lowest_prop_tag < lowest_prop_tag_trigger) {
++	    client.prop_delta = lowest_prop_tag - time;
++	  }
++	  client.idle = false;
++	} // if this client was idle
++
++#ifndef DO_NOT_DELAY_TAG_CALC
++	RequestTag tag(0, 0, 0, time);
++
++	if (!client.has_request()) {
++	  tag = RequestTag(client.get_req_tag(), client.info,
++			   req_params, time, cost);
++
++	  // copy tag to previous tag for client
++	  client.update_req_tag(tag, tick);
++	}
++#else
++	RequestTag tag(client.get_req_tag(), client.info, req_params, time, cost);
++	// copy tag to previous tag for client
++	client.update_req_tag(tag, tick);
++#endif
++
++	client.add_request(tag, client.client, std::move(request));
++	if (1 == client.requests.size()) {
++	  // NB: can the following 4 calls to adjust be changed
++	  // promote? Can adding a request ever demote a client in the
++	  // heaps?
++	  resv_heap.adjust(client);
++	  limit_heap.adjust(client);
++	  ready_heap.adjust(client);
++#if USE_PROP_HEAP
++	  prop_heap.adjust(client);
++#endif
++	}
++
++	client.cur_rho = req_params.rho;
++	client.cur_delta = req_params.delta;
++
++	resv_heap.adjust(client);
++	limit_heap.adjust(client);
++	ready_heap.adjust(client);
++#if USE_PROP_HEAP
++	prop_heap.adjust(client);
++#endif
++      } // add_request
++
++
++      // data_mtx should be held when called; top of heap should have
++      // a ready request
++      template<typename C1, IndIntruHeapData ClientRec::*C2, typename C3>
++      void pop_process_request(IndIntruHeap<C1, ClientRec, C2, C3, B>& heap,
++			       std::function<void(const C& client,
++						  RequestRef& request)> process) {
++	// gain access to data
++	ClientRec& top = heap.top();
++	ClientReq& first = top.next_request();
++	RequestRef request = std::move(first.request);
++
++	// pop request and adjust heaps
++	top.pop_request();
++
++#ifndef DO_NOT_DELAY_TAG_CALC
++	if (top.has_request()) {
++	  ClientReq& next_first = top.next_request();
++	  next_first.tag = RequestTag(first.tag, top.info,
++	                              ReqParams(top.cur_delta, top.cur_rho),
++				      next_first.tag.arrival);
++
++  	  // copy tag to previous tag for client
++	  top.update_req_tag(next_first.tag, tick);
++	}
++#endif
++
++	resv_heap.demote(top);
++	limit_heap.adjust(top);
++#if USE_PROP_HEAP
++	prop_heap.demote(top);
++#endif
++	ready_heap.demote(top);
++
++	// process
++	process(top.client, request);
++      } // pop_process_request
++
++
++      // data_mtx should be held when called
++      void reduce_reservation_tags(ClientRec& client) {
++	for (auto& r : client.requests) {
++	  r.tag.reservation -= client.info.reservation_inv;
++
++#ifndef DO_NOT_DELAY_TAG_CALC
++	  // reduce only for front tag. because next tags' value are invalid
++	  break;
++#endif
++	}
++	// don't forget to update previous tag
++	client.prev_tag.reservation -= client.info.reservation_inv;
++	resv_heap.promote(client);
++      }
++
++
++      // data_mtx should be held when called
++      void reduce_reservation_tags(const C& client_id) {
++	auto client_it = client_map.find(client_id);
++
++	// means the client was cleaned from map; should never happen
++	// as long as cleaning times are long enough
++	assert(client_map.end() != client_it);
++	reduce_reservation_tags(*client_it->second);
++      }
++
++
++      // data_mtx should be held when called
++      NextReq do_next_request(Time now) {
++	NextReq result;
++
++	// if reservation queue is empty, all are empty (i.e., no active clients)
++	if(resv_heap.empty()) {
++	  result.type = NextReqType::none;
++	  return result;
++	}
++
++	// try constraint (reservation) based scheduling
++
++	auto& reserv = resv_heap.top();
++	if (reserv.has_request() &&
++	    reserv.next_request().tag.reservation <= now) {
++	  result.type = NextReqType::returning;
++	  result.heap_id = HeapId::reservation;
++	  return result;
++	}
++
++	// no existing reservations before now, so try weight-based
++	// scheduling
++
++	// all items that are within limit are eligible based on
++	// priority
++	auto limits = &limit_heap.top();
++	while (limits->has_request() &&
++	       !limits->next_request().tag.ready &&
++	       limits->next_request().tag.limit <= now) {
++	  limits->next_request().tag.ready = true;
++	  ready_heap.promote(*limits);
++	  limit_heap.demote(*limits);
++
++	  limits = &limit_heap.top();
++	}
++
++	auto& readys = ready_heap.top();
++	if (readys.has_request() &&
++	    readys.next_request().tag.ready &&
++	    readys.next_request().tag.proportion < max_tag) {
++	  result.type = NextReqType::returning;
++	  result.heap_id = HeapId::ready;
++	  return result;
++	}
++
++	// if nothing is schedulable by reservation or
++	// proportion/weight, and if we allow limit break, try to
++	// schedule something with the lowest proportion tag or
++	// alternatively lowest reservation tag.
++	if (allow_limit_break) {
++	  if (readys.has_request() &&
++	      readys.next_request().tag.proportion < max_tag) {
++	    result.type = NextReqType::returning;
++	    result.heap_id = HeapId::ready;
++	    return result;
++	  } else if (reserv.has_request() &&
++		     reserv.next_request().tag.reservation < max_tag) {
++	    result.type = NextReqType::returning;
++	    result.heap_id = HeapId::reservation;
++	    return result;
++	  }
++	}
++
++	// nothing scheduled; make sure we re-run when next
++	// reservation item or next limited item comes up
++
++	Time next_call = TimeMax;
++	if (resv_heap.top().has_request()) {
++	  next_call =
++	    min_not_0_time(next_call,
++			   resv_heap.top().next_request().tag.reservation);
++	}
++	if (limit_heap.top().has_request()) {
++	  const auto& next = limit_heap.top().next_request();
++	  assert(!next.tag.ready || max_tag == next.tag.proportion);
++	  next_call = min_not_0_time(next_call, next.tag.limit);
++	}
++	if (next_call < TimeMax) {
++	  result.type = NextReqType::future;
++	  result.when_ready = next_call;
++	  return result;
++	} else {
++	  result.type = NextReqType::none;
++	  return result;
++	}
++      } // do_next_request
++
++
++      // if possible is not zero and less than current then return it;
++      // otherwise return current; the idea is we're trying to find
++      // the minimal time but ignoring zero
++      static inline const Time& min_not_0_time(const Time& current,
++					       const Time& possible) {
++	return TimeZero == possible ? current : std::min(current, possible);
++      }
++
++
++      /*
++       * This is being called regularly by RunEvery. Every time it's
++       * called it notes the time and delta counter (mark point) in a
++       * deque. It also looks at the deque to find the most recent
++       * mark point that is older than clean_age. It then walks the
++       * map and delete all server entries that were last used before
++       * that mark point.
++       */
++      void do_clean() {
++	TimePoint now = std::chrono::steady_clock::now();
++	DataGuard g(data_mtx);
++	clean_mark_points.emplace_back(MarkPoint(now, tick));
++
++	// first erase the super-old client records
++
++	Counter erase_point = 0;
++	auto point = clean_mark_points.front();
++	while (point.first <= now - erase_age) {
++	  erase_point = point.second;
++	  clean_mark_points.pop_front();
++	  point = clean_mark_points.front();
++	}
++
++	Counter idle_point = 0;
++	for (auto i : clean_mark_points) {
++	  if (i.first <= now - idle_age) {
++	    idle_point = i.second;
++	  } else {
++	    break;
++	  }
++	}
++
++	if (erase_point > 0 || idle_point > 0) {
++	  for (auto i = client_map.begin(); i != client_map.end(); /* empty */) {
++	    auto i2 = i++;
++	    if (erase_point && i2->second->last_tick <= erase_point) {
++	      delete_from_heaps(i2->second);
++	      client_map.erase(i2);
++	    } else if (idle_point && i2->second->last_tick <= idle_point) {
++	      i2->second->idle = true;
++	    }
++	  } // for
++	} // if
++      } // do_clean
++
++
++      // data_mtx must be held by caller
++      template<IndIntruHeapData ClientRec::*C1,typename C2>
++      void delete_from_heap(ClientRecRef& client,
++			    c::IndIntruHeap<ClientRecRef,ClientRec,C1,C2,B>& heap) {
++	auto i = heap.rfind(client);
++	heap.remove(i);
++      }
++
++
++      // data_mtx must be held by caller
++      void delete_from_heaps(ClientRecRef& client) {
++	delete_from_heap(client, resv_heap);
++#if USE_PROP_HEAP
++	delete_from_heap(client, prop_heap);
++#endif
++	delete_from_heap(client, limit_heap);
++	delete_from_heap(client, ready_heap);
++      }
++    }; // class PriorityQueueBase
++
++
++    template<typename C, typename R, uint B=2>
++    class PullPriorityQueue : public PriorityQueueBase<C,R,B> {
++      using super = PriorityQueueBase<C,R,B>;
++
++    public:
++
++      // When a request is pulled, this is the return type.
++      struct PullReq {
++	struct Retn {
++	  C                           client;
++	  typename super::RequestRef  request;
++	  PhaseType                   phase;
++	};
++
++	typename super::NextReqType   type;
++	boost::variant<Retn,Time>     data;
++
++	bool is_none() const { return type == super::NextReqType::none; }
++
++	bool is_retn() const { return type == super::NextReqType::returning; }
++	Retn& get_retn() {
++	  return boost::get<Retn>(data);
++	}
++
++	bool is_future() const { return type == super::NextReqType::future; }
++	Time getTime() const { return boost::get<Time>(data); }
++      };
++
++
++#ifdef PROFILE
++      ProfileTimer<std::chrono::nanoseconds> pull_request_timer;
++      ProfileTimer<std::chrono::nanoseconds> add_request_timer;
++#endif
++
++      template<typename Rep, typename Per>
++      PullPriorityQueue(typename super::ClientInfoFunc _client_info_f,
++			std::chrono::duration<Rep,Per> _idle_age,
++			std::chrono::duration<Rep,Per> _erase_age,
++			std::chrono::duration<Rep,Per> _check_time,
++			bool _allow_limit_break = false) :
++	super(_client_info_f,
++	      _idle_age, _erase_age, _check_time,
++	      _allow_limit_break)
++      {
++	// empty
++      }
++
++
++      // pull convenience constructor
++      PullPriorityQueue(typename super::ClientInfoFunc _client_info_f,
++			bool _allow_limit_break = false) :
++	PullPriorityQueue(_client_info_f,
++			  std::chrono::minutes(10),
++			  std::chrono::minutes(15),
++			  std::chrono::minutes(6),
++			  _allow_limit_break)
++      {
++	// empty
++      }
++
++
++      inline void add_request(const R& request,
++			      const C& client_id,
++			      const ReqParams& req_params,
++			      double addl_cost = 0.0) {
++	add_request(typename super::RequestRef(new R(request)),
++		    client_id,
++		    req_params,
++		    get_time(),
++		    addl_cost);
++      }
++
++
++      inline void add_request(const R& request,
++			      const C& client_id,
++			      double addl_cost = 0.0) {
++	static const ReqParams null_req_params;
++	add_request(typename super::RequestRef(new R(request)),
++		    client_id,
++		    null_req_params,
++		    get_time(),
++		    addl_cost);
++      }
++
++
++
++      inline void add_request_time(const R& request,
++				   const C& client_id,
++				   const ReqParams& req_params,
++				   const Time time,
++				   double addl_cost = 0.0) {
++	add_request(typename super::RequestRef(new R(request)),
++		    client_id,
++		    req_params,
++		    time,
++		    addl_cost);
++      }
++
++
++      inline void add_request(typename super::RequestRef&& request,
++			      const C& client_id,
++			      const ReqParams& req_params,
++			      double addl_cost = 0.0) {
++	add_request(request, req_params, client_id, get_time(), addl_cost);
++      }
++
++
++      inline void add_request(typename super::RequestRef&& request,
++			      const C& client_id,
++			      double addl_cost = 0.0) {
++	static const ReqParams null_req_params;
++	add_request(request, null_req_params, client_id, get_time(), addl_cost);
++      }
++
++
++      // this does the work; the versions above provide alternate interfaces
++      void add_request(typename super::RequestRef&& request,
++		       const C&                     client_id,
++		       const ReqParams&             req_params,
++		       const Time                   time,
++		       double                       addl_cost = 0.0) {
++	typename super::DataGuard g(this->data_mtx);
++#ifdef PROFILE
++	add_request_timer.start();
++#endif
++	super::do_add_request(std::move(request),
++			      client_id,
++			      req_params,
++			      time,
++			      addl_cost);
++	// no call to schedule_request for pull version
++#ifdef PROFILE
++	add_request_timer.stop();
++#endif
++      }
++
++
++      inline PullReq pull_request() {
++	return pull_request(get_time());
++      }
++
++
++      PullReq pull_request(Time now) {
++	PullReq result;
++	typename super::DataGuard g(this->data_mtx);
++#ifdef PROFILE
++	pull_request_timer.start();
++#endif
++
++	typename super::NextReq next = super::do_next_request(now);
++	result.type = next.type;
++	switch(next.type) {
++	case super::NextReqType::none:
++	  return result;
++	  break;
++	case super::NextReqType::future:
++	  result.data = next.when_ready;
++	  return result;
++	  break;
++	case super::NextReqType::returning:
++	  // to avoid nesting, break out and let code below handle this case
++	  break;
++	default:
++	  assert(false);
++	}
++
++	// we'll only get here if we're returning an entry
++
++	auto process_f =
++	  [&] (PullReq& pull_result, PhaseType phase) ->
++	  std::function<void(const C&,
++			     typename super::RequestRef&)> {
++	  return [&pull_result, phase](const C& client,
++				       typename super::RequestRef& request) {
++	    pull_result.data =
++	    typename PullReq::Retn{client, std::move(request), phase};
++	  };
++	};
++
++	switch(next.heap_id) {
++	case super::HeapId::reservation:
++	  super::pop_process_request(this->resv_heap,
++				     process_f(result, PhaseType::reservation));
++	  ++this->reserv_sched_count;
++	  break;
++	case super::HeapId::ready:
++	  super::pop_process_request(this->ready_heap,
++				     process_f(result, PhaseType::priority));
++	  { // need to use retn temporarily
++	    auto& retn = boost::get<typename PullReq::Retn>(result.data);
++	    super::reduce_reservation_tags(retn.client);
++	  }
++	  ++this->prop_sched_count;
++	  break;
++	default:
++	  assert(false);
++	}
++
++#ifdef PROFILE
++	pull_request_timer.stop();
++#endif
++	return result;
++      } // pull_request
++
++
++    protected:
++
++
++      // data_mtx should be held when called; unfortunately this
++      // function has to be repeated in both push & pull
++      // specializations
++      typename super::NextReq next_request() {
++	return next_request(get_time());
++      }
++    }; // class PullPriorityQueue
++
++
++    // PUSH version
++    template<typename C, typename R, uint B=2>
++    class PushPriorityQueue : public PriorityQueueBase<C,R,B> {
++
++    protected:
++
++      using super = PriorityQueueBase<C,R,B>;
++
++    public:
++
++      // a function to see whether the server can handle another request
++      using CanHandleRequestFunc = std::function<bool(void)>;
++
++      // a function to submit a request to the server; the second
++      // parameter is a callback when it's completed
++      using HandleRequestFunc =
++	std::function<void(const C&,typename super::RequestRef,PhaseType)>;
++
++    protected:
++
++      CanHandleRequestFunc can_handle_f;
++      HandleRequestFunc    handle_f;
++      // for handling timed scheduling
++      std::mutex  sched_ahead_mtx;
++      std::condition_variable sched_ahead_cv;
++      Time sched_ahead_when = TimeZero;
++
++#ifdef PROFILE
++    public:
++      ProfileTimer<std::chrono::nanoseconds> add_request_timer;
++      ProfileTimer<std::chrono::nanoseconds> request_complete_timer;
++    protected:
++#endif
++
++      // NB: threads declared last, so constructed last and destructed first
++
++      std::thread sched_ahead_thd;
++
++    public:
++
++      // push full constructor
++      template<typename Rep, typename Per>
++      PushPriorityQueue(typename super::ClientInfoFunc _client_info_f,
++			CanHandleRequestFunc _can_handle_f,
++			HandleRequestFunc _handle_f,
++			std::chrono::duration<Rep,Per> _idle_age,
++			std::chrono::duration<Rep,Per> _erase_age,
++			std::chrono::duration<Rep,Per> _check_time,
++			bool _allow_limit_break = false) :
++	super(_client_info_f,
++	      _idle_age, _erase_age, _check_time,
++	      _allow_limit_break)
++      {
++	can_handle_f = _can_handle_f;
++	handle_f = _handle_f;
++	sched_ahead_thd = std::thread(&PushPriorityQueue::run_sched_ahead, this);
++      }
++
++
++      // push convenience constructor
++      PushPriorityQueue(typename super::ClientInfoFunc _client_info_f,
++			CanHandleRequestFunc _can_handle_f,
++			HandleRequestFunc _handle_f,
++			bool _allow_limit_break = false) :
++	PushPriorityQueue(_client_info_f,
++			  _can_handle_f,
++			  _handle_f,
++			  std::chrono::minutes(10),
++			  std::chrono::minutes(15),
++			  std::chrono::minutes(6),
++			  _allow_limit_break)
++      {
++	// empty
++      }
++
++
++      ~PushPriorityQueue() {
++	this->finishing = true;
++	sched_ahead_cv.notify_one();
++	sched_ahead_thd.join();
++      }
++
++    public:
++
++      inline void add_request(const R& request,
++			      const C& client_id,
++			      const ReqParams& req_params,
++			      double addl_cost = 0.0) {
++	add_request(typename super::RequestRef(new R(request)),
++		    client_id,
++		    req_params,
++		    get_time(),
++		    addl_cost);
++      }
++
++
++      inline void add_request(typename super::RequestRef&& request,
++			      const C& client_id,
++			      const ReqParams& req_params,
++			      double addl_cost = 0.0) {
++	add_request(request, req_params, client_id, get_time(), addl_cost);
++      }
++
++
++      inline void add_request_time(const R& request,
++				   const C& client_id,
++				   const ReqParams& req_params,
++				   const Time time,
++				   double addl_cost = 0.0) {
++	add_request(typename super::RequestRef(new R(request)),
++		    client_id,
++		    req_params,
++		    time,
++		    addl_cost);
++      }
++
++
++      void add_request(typename super::RequestRef&& request,
++		       const C&         client_id,
++		       const ReqParams& req_params,
++		       const Time       time,
++		       double           addl_cost = 0.0) {
++	typename super::DataGuard g(this->data_mtx);
++#ifdef PROFILE
++	add_request_timer.start();
++#endif
++	super::do_add_request(std::move(request),
++			      client_id,
++			      req_params,
++			      time,
++			      addl_cost);
++	schedule_request();
++#ifdef PROFILE
++	add_request_timer.stop();
++#endif
++      }
++
++
++      void request_completed() {
++	typename super::DataGuard g(this->data_mtx);
++#ifdef PROFILE
++	request_complete_timer.start();
++#endif
++	schedule_request();
++#ifdef PROFILE
++	request_complete_timer.stop();
++#endif
++      }
++
++    protected:
++
++      // data_mtx should be held when called; furthermore, the heap
++      // should not be empty and the top element of the heap should
++      // not be already handled
++      //
++      // NOTE: the use of "super::ClientRec" in either the template
++      // construct or as a parameter to submit_top_request generated
++      // a compiler error in g++ 4.8.4, when ClientRec was
++      // "protected" rather than "public". By g++ 6.3.1 this was not
++      // an issue. But for backwards compatibility
++      // PriorityQueueBase::ClientRec is public.
++      template<typename C1,
++	       IndIntruHeapData super::ClientRec::*C2,
++	       typename C3,
++	       uint B4>
++      C submit_top_request(IndIntruHeap<C1,typename super::ClientRec,C2,C3,B4>& heap,
++			   PhaseType phase) {
++	C client_result;
++	super::pop_process_request(heap,
++				   [this, phase, &client_result]
++				   (const C& client,
++				    typename super::RequestRef& request) {
++				     client_result = client;
++				     handle_f(client, std::move(request), phase);
++				   });
++	return client_result;
++      }
++
++
++      // data_mtx should be held when called
++      void submit_request(typename super::HeapId heap_id) {
++	C client;
++	switch(heap_id) {
++	case super::HeapId::reservation:
++	  // don't need to note client
++	  (void) submit_top_request(this->resv_heap, PhaseType::reservation);
++	  // unlike the other two cases, we do not reduce reservation
++	  // tags here
++	  ++this->reserv_sched_count;
++	  break;
++	case super::HeapId::ready:
++	  client = submit_top_request(this->ready_heap, PhaseType::priority);
++	  super::reduce_reservation_tags(client);
++	  ++this->prop_sched_count;
++	  break;
++	default:
++	  assert(false);
++	}
++      } // submit_request
++
++
++      // data_mtx should be held when called; unfortunately this
++      // function has to be repeated in both push & pull
++      // specializations
++      typename super::NextReq next_request() {
++	return next_request(get_time());
++      }
++
++
++      // data_mtx should be held when called; overrides member
++      // function in base class to add check for whether a request can
++      // be pushed to the server
++      typename super::NextReq next_request(Time now) {
++	if (!can_handle_f()) {
++	  typename super::NextReq result;
++	  result.type = super::NextReqType::none;
++	  return result;
++	} else {
++	  return super::do_next_request(now);
++	}
++      } // next_request
++
++
++      // data_mtx should be held when called
++      void schedule_request() {
++	typename super::NextReq next_req = next_request();
++	switch (next_req.type) {
++	case super::NextReqType::none:
++	  return;
++	case super::NextReqType::future:
++	  sched_at(next_req.when_ready);
++	  break;
++	case super::NextReqType::returning:
++	  submit_request(next_req.heap_id);
++	  break;
++	default:
++	  assert(false);
++	}
++      }
++
++
++      // this is the thread that handles running schedule_request at
++      // future times when nothing can be scheduled immediately
++      void run_sched_ahead() {
++	std::unique_lock<std::mutex> l(sched_ahead_mtx);
++
++	while (!this->finishing) {
++	  if (TimeZero == sched_ahead_when) {
++	    sched_ahead_cv.wait(l);
++	  } else {
++	    Time now;
++	    while (!this->finishing && (now = get_time()) < sched_ahead_when) {
++	      long microseconds_l = long(1 + 1000000 * (sched_ahead_when - now));
++	      auto microseconds = std::chrono::microseconds(microseconds_l);
++	      sched_ahead_cv.wait_for(l, microseconds);
++	    }
++	    sched_ahead_when = TimeZero;
++	    if (this->finishing) return;
++
++	    l.unlock();
++	    if (!this->finishing) {
++	      typename super::DataGuard g(this->data_mtx);
++	      schedule_request();
++	    }
++	    l.lock();
++	  }
++	}
++      }
++
++
++      void sched_at(Time when) {
++	std::lock_guard<std::mutex> l(sched_ahead_mtx);
++	if (TimeZero == sched_ahead_when || when < sched_ahead_when) {
++	  sched_ahead_when = when;
++	  sched_ahead_cv.notify_one();
++	}
++      }
++    }; // class PushPriorityQueue
++
++  } // namespace dmclock
++} // namespace crimson
diff --cc src/dmclock/src/dmclock_util.cc
index 00000000000,00000000000..865b60d42a8
new file mode 100644
--- /dev/null
+++ b/src/dmclock/src/dmclock_util.cc
@@@ -1,0 -1,0 +1,27 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++
++/*
++ * Copyright (C) 2017 Red Hat Inc.
++ */
++
++
++#include <signal.h>
++
++#include <iomanip>
++#include <sstream>
++
++#include "dmclock_util.h"
++
++
++std::string crimson::dmclock::format_time(const Time& time, uint modulo) {
++  long subtract = long(time / modulo) * modulo;
++  std::stringstream ss;
++  ss << std::fixed << std::setprecision(4) << (time - subtract);
++  return ss.str();
++}
++
++
++void crimson::dmclock::debugger() {
++  raise(SIGCONT);
++}
diff --cc src/dmclock/src/dmclock_util.h
index 00000000000,00000000000..d12c6f9eb63
new file mode 100644
--- /dev/null
+++ b/src/dmclock/src/dmclock_util.h
@@@ -1,0 -1,0 +1,45 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++
++/*
++ * Copyright (C) 2017 Red Hat Inc.
++ */
++
++
++#pragma once
++
++
++#include <unistd.h>
++#include <assert.h>
++#include <sys/time.h>
++
++#include <limits>
++#include <cmath>
++#include <chrono>
++
++
++namespace crimson {
++  namespace dmclock {
++    // we're using double to represent time, but we could change it by
++    // changing the following declarations (and by making sure a min
++    // function existed)
++    using Time = double;
++    static const Time TimeZero = 0.0;
++    static const Time TimeMax = std::numeric_limits<Time>::max();
++    static const double NaN = nan("");
++
++
++    inline Time get_time() {
++      struct timeval now;
++      auto result = gettimeofday(&now, NULL);
++      (void) result;
++      assert(0 == result);
++      return now.tv_sec + (now.tv_usec / 1000000.0);
++    }
++
++    std::string format_time(const Time& time, uint modulo = 1000);
++
++    void debugger();
++
++  } // namespace dmclock
++} // namespace crimson
diff --cc src/dmclock/support/CMakeLists.txt
index 00000000000,00000000000..552439ebc59
new file mode 100644
--- /dev/null
+++ b/src/dmclock/support/CMakeLists.txt
@@@ -1,0 -1,0 +1,1 @@@
++add_subdirectory(test)
diff --cc src/dmclock/support/src/debug.h
index 00000000000,00000000000..2a78cc82309
new file mode 100644
--- /dev/null
+++ b/src/dmclock/support/src/debug.h
@@@ -1,0 -1,0 +1,17 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++
++/*
++ * Copyright (C) 2016 Red Hat Inc.
++ */
++
++
++#pragma once
++
++
++#include <signal.h>
++
++
++inline void debugger() {
++    raise(SIGCONT);
++}
diff --cc src/dmclock/support/src/heap.h
index 00000000000,00000000000..0f4d24f7c2d
new file mode 100644
--- /dev/null
+++ b/src/dmclock/support/src/heap.h
@@@ -1,0 -1,0 +1,240 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++
++/*
++ * Copyright (C) 2016 Red Hat Inc.
++ */
++
++
++#pragma once
++
++
++#include <vector>
++#include <ostream>
++
++#include "assert.h"
++
++
++namespace crimson {
++
++  /*
++   * T : type of data held in the heap.
++   *
++   * C : class that implements operator() with two arguments and
++   * returns a boolean when the first argument is greater than (higher
++   * in priority than) the second.
++   */
++  template<typename T, typename C>
++  class Heap {
++
++  public:
++
++    class iterator {
++
++      friend Heap<T,C>;
++
++      Heap<T,C>& heap;
++      int        index;
++
++      iterator(Heap<T,C>& _heap, int _index) :
++	heap(_heap),
++	index(_index)
++      {
++	// empty
++      }
++
++    public:
++
++      iterator(iterator&& other) :
++	heap(other.heap),
++	index(other.index)
++      {
++	// empty
++      }
++
++      iterator& operator++() {
++	++index;
++	return *this;
++      }
++
++      bool operator==(const iterator& other) const {
++	return index == other.index;
++      }
++
++      bool operator!=(const iterator& other) const {
++	return !(*this == other);
++      }
++
++      T& operator*() {
++	return heap.data[index];
++      }
++
++      // the item this iterator refers to
++      void increase() {
++	heap.siftUp(index);
++      }
++    }; // class iterator
++
++    friend iterator;
++
++  protected:
++
++    std::vector<T> data;
++    int count;
++    C comparator;
++
++    // parent(0) should be a negative value, which it is due to
++    // truncating towards negative infinity
++    static inline int parent(int i) { return (i - 1) / 2; }
++
++    static inline int lhs(int i) { return 2*i + 1; }
++
++    static inline int rhs(int i) { return 2*i + 2; }
++
++    void siftUp(int i) {
++      assert(i < count);
++
++      while (i > 0) {
++	int pi = parent(i);
++	if (!comparator(data[i], data[pi])) {
++	  break;
++	}
++
++	std::swap(data[i], data[pi]);
++	i = pi;
++      }
++    }
++
++    void siftDown(int i) {
++      while (i < count) {
++	int li = lhs(i);
++	int ri = rhs(i);
++
++	if (li < count) {
++	  if (comparator(data[li], data[i])) {
++	    if (ri < count && comparator(data[ri], data[li])) {
++	      std::swap(data[i], data[ri]);
++	      i = ri;
++	    } else {
++	      std::swap(data[i], data[li]);
++	      i = li;
++	    }
++	  } else if (ri < count && comparator(data[ri], data[i])) {
++	    std::swap(data[i], data[ri]);
++	    i = ri;
++	  } else {
++	    break;
++	  }
++	} else {
++	  break;
++	}
++      }
++    }
++
++
++  public:
++
++    Heap() :
++      count(0)
++    {
++      // empty
++    }
++
++    Heap(const Heap<T,C>& other) {
++      data.resize(other.data.size());
++      for (int i = 0; i < other.count; ++i) {
++	data[i] = other.data[i];
++      }
++      count = other.count;
++    }
++
++    const Heap<T,C>& operator=(const Heap<T,C>& other) {
++      data.resize(other.data.size());
++      for (int i = 0; i < other.count; ++i) {
++	data[i] = other.data[i];
++      }
++      count = other.count;
++      return *this;
++    }
++
++    bool empty() const { return 0 == count; }
++
++    T& top() { return data[0]; }
++
++    void push(T item) {
++      int i = count++;
++      data.push_back(item);
++      siftUp(i);
++    }
++
++    void pop() {
++      data[0] = data[--count];
++      data.resize(count);
++      siftDown(0);
++    }
++
++    void updateTop() {
++      siftDown(0);
++    }
++
++    void clear() {
++      count = 0;
++      data.resize(0);
++    }
++
++    iterator begin() {
++      return iterator(*this, 0);
++    }
++
++    iterator end() {
++      return iterator(*this, count);
++    }
++
++    std::ostream& displaySorted(std::ostream& out,
++				std::function<bool(const T&)> filter,
++				bool insert_line_breaks = true) const {
++      Heap<T,C> temp = *this;
++
++      bool first = true;
++      out << "[ ";
++
++      while(!temp.empty()) {
++	const T& top = temp.top();
++	if (filter(top)) {
++	  if (!first) {
++	    out << ", ";
++	  }
++	  if (insert_line_breaks) {
++	    out << std::endl << "    ";
++	  }
++	  out << temp.top();
++	  first = false;
++	}
++	temp.pop();
++      }
++
++      out << " ]";
++      if (insert_line_breaks) {
++	out << std::endl;
++      }
++      return out;
++    }
++
++    template<typename T1, typename T2>
++    friend std::ostream& operator<<(std::ostream&, const Heap<T1,T2>&);
++  }; // class Heap
++
++
++  template<typename T1, typename T2>
++  std::ostream& operator<<(std::ostream& out, const Heap<T1,T2>& h) {
++    out << "[ ";
++    if (h.count) {
++      out << h.data[0];
++    }
++    for (int i = 1; i < h.count; i++) {
++      out << ", " << h.data[i];
++    }
++    out << " ]";
++    return out;
++  }
++} // namespace
diff --cc src/dmclock/support/src/indirect_intrusive_heap.h
index 00000000000,00000000000..b6075bda22f
new file mode 100644
--- /dev/null
+++ b/src/dmclock/support/src/indirect_intrusive_heap.h
@@@ -1,0 -1,0 +1,549 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++
++/*
++ * Copyright (C) 2016 Red Hat Inc.
++ */
++
++
++#pragma once
++
++
++#include <memory>
++#include <vector>
++#include <string>
++#include <iostream>
++#include <functional>
++#include <algorithm>
++
++#include "assert.h"
++
++
++namespace crimson {
++  using IndIntruHeapData = size_t;
++
++  /* T is the ultimate data that's being stored in the heap, although
++   *   through indirection.
++   *
++   * I is the indirect type that will actually be stored in the heap
++   *   and that must allow dereferencing (via operator*) to yield a
++   *   T&.
++   *
++   * C is a functor when given two T&'s will return true if the first
++   *   must precede the second.
++   *
++   * heap_info is a data member pointer as to where the heap data in T
++   * is stored.
++   *
++   * K is the branching factor of the heap, default is 2 (binary heap).
++   */
++  template<typename I,
++	   typename T,
++	   IndIntruHeapData T::*heap_info,
++	   typename C,
++	   uint K = 2>
++  class IndIntruHeap {
++
++    // shorthand
++    using HeapIndex = IndIntruHeapData;
++
++    static_assert(
++      std::is_same<T,typename std::pointer_traits<I>::element_type>::value,
++      "class I must resolve to class T by indirection (pointer dereference)");
++
++    static_assert(
++      std::is_same<bool,
++      typename std::result_of<C(const T&,const T&)>::type>::value,
++      "class C must define operator() to take two const T& and return a bool");
++
++    static_assert(K >= 2, "K (degree of branching) must be at least 2");
++
++    class Iterator {
++      friend IndIntruHeap<I, T, heap_info, C, K>;
++
++      IndIntruHeap<I, T, heap_info, C, K>& heap;
++      HeapIndex                            index;
++
++      Iterator(IndIntruHeap<I, T, heap_info, C, K>& _heap, HeapIndex _index) :
++	heap(_heap),
++	index(_index)
++      {
++	// empty
++      }
++
++    public:
++
++      Iterator(Iterator&& other) :
++	heap(other.heap),
++	index(other.index)
++      {
++	// empty
++      }
++
++      Iterator(const Iterator& other) :
++	heap(other.heap),
++	index(other.index)
++      {
++	// empty
++      }
++
++      Iterator& operator=(Iterator&& other) {
++	std::swap(heap, other.heap);
++	std::swap(index, other.index);
++	return *this;
++      }
++
++      Iterator& operator=(const Iterator& other) {
++	heap = other.heap;
++	index = other.index;
++      }
++
++      Iterator& operator++() {
++	if (index <= heap.count) {
++	  ++index;
++	}
++	return *this;
++      }
++
++      bool operator==(const Iterator& other) const {
++	return &heap == &other.heap && index == other.index;
++      }
++
++      bool operator!=(const Iterator& other) const {
++	return !(*this == other);
++      }
++
++      T& operator*() {
++	return *heap.data[index];
++      }
++
++      T* operator->() {
++	return &(*heap.data[index]);
++      }
++
++#if 0
++      // the item this iterator refers to
++      void increase() {
++	heap.sift_up(index);
++      }
++#endif
++    }; // class Iterator
++
++
++    class ConstIterator {
++      friend IndIntruHeap<I, T, heap_info, C, K>;
++
++      const IndIntruHeap<I, T, heap_info, C, K>& heap;
++      HeapIndex                                  index;
++
++      ConstIterator(const IndIntruHeap<I, T, heap_info, C, K>& _heap,
++		    HeapIndex _index) :
++	heap(_heap),
++	index(_index)
++      {
++	// empty
++      }
++
++    public:
++
++      ConstIterator(ConstIterator&& other) :
++	heap(other.heap),
++	index(other.index)
++      {
++	// empty
++      }
++
++      ConstIterator(const ConstIterator& other) :
++	heap(other.heap),
++	index(other.index)
++      {
++	// empty
++      }
++
++      ConstIterator& operator=(ConstIterator&& other) {
++	std::swap(heap, other.heap);
++	std::swap(index, other.index);
++	return *this;
++      }
++
++      ConstIterator& operator=(const ConstIterator& other) {
++	heap = other.heap;
++	index = other.index;
++      }
++
++      ConstIterator& operator++() {
++	if (index <= heap.count) {
++	  ++index;
++	}
++	return *this;
++      }
++
++      bool operator==(const ConstIterator& other) const {
++	return &heap == &other.heap && index == other.index;
++      }
++
++      bool operator!=(const ConstIterator& other) const {
++	return !(*this == other);
++      }
++
++      const T& operator*() {
++	return *heap.data[index];
++      }
++
++      const T* operator->() {
++	return &(*heap.data[index]);
++      }
++    }; // class ConstIterator
++
++
++  protected:
++
++    std::vector<I> data;
++    HeapIndex      count;
++    C              comparator;
++
++  public:
++
++    IndIntruHeap() :
++      count(0)
++    {
++      // empty
++    }
++
++    IndIntruHeap(const IndIntruHeap<I,T,heap_info,C,K>& other) :
++      count(other.count)
++    {
++      for (HeapIndex i = 0; i < other.count; ++i) {
++	data.push_back(other.data[i]);
++      }
++    }
++
++    bool empty() const { return 0 == count; }
++
++    size_t size() const { return (size_t) count; }
++
++    T& top() { return *data[0]; }
++
++    const T& top() const { return *data[0]; }
++
++    I& top_ind() { return data[0]; }
++
++    const I& top_ind() const { return data[0]; }
++
++    void push(I&& item) {
++      HeapIndex i = count++;
++      intru_data_of(item) = i;
++      data.emplace_back(std::move(item));
++      sift_up(i);
++    }
++
++    void push(const I& item) {
++      I copy(item);
++      push(std::move(copy));
++    }
++
++    void pop() {
++      remove(0);
++    }
++
++    void remove(Iterator& i) {
++      remove(i.index);
++      i = end();
++    }
++
++    Iterator find(const I& ind_item) {
++      for (HeapIndex i = 0; i < count; ++i) {
++	if (data[i] == ind_item) {
++	  return Iterator(*this, i);
++	}
++      }
++      return end();
++    }
++
++    // when passing in value we do a comparison via operator==
++    Iterator find(const T& item) {
++      for (HeapIndex i = 0; i < count; ++i) {
++	if (*data[i] == item) {
++	  return Iterator(*this, i);
++	}
++      }
++      return end();
++    }
++
++    // reverse find -- start looking from bottom of heap
++    Iterator rfind(const I& ind_item) {
++      // HeapIndex is unsigned, so we can't allow to go negative; so
++      // we'll keep it one more than actual index
++      for (HeapIndex i = count; i > 0; --i) {
++	if (data[i-1] == ind_item) {
++	  return Iterator(*this, i-1);
++	}
++      }
++      return end();
++    }
++
++    // reverse find -- start looking from bottom of heap
++    Iterator rfind(const T& item) {
++      // HeapIndex is unsigned, so we can't allow to go negative; so
++      // we'll keep it one more than actual index
++      for (HeapIndex i = count; i > 0; --i) {
++	if (*data[i-1] == item) {
++	  return Iterator(*this, i-1);
++	}
++      }
++      return end();
++    }
++
++    ConstIterator find(const I& ind_item) const {
++      for (HeapIndex i = 0; i < count; ++i) {
++	if (data[i] == ind_item) {
++	  return ConstIterator(*this, i);
++	}
++      }
++      return cend();
++    }
++
++    // when passing in value we do a comparison via operator==
++    ConstIterator find(const T& item) const {
++      for (HeapIndex i = 0; i < count; ++i) {
++	if (*data[i] == item) {
++	  return ConstIterator(*this, i);
++	}
++      }
++      return cend();
++    }
++
++    // reverse find -- start looking from bottom of heap
++    ConstIterator rfind(const I& ind_item) const {
++      // HeapIndex is unsigned, so we can't allow to go negative; so
++      // we'll keep it one more than actual index
++      for (HeapIndex i = count; i > 0; --i) {
++	if (data[i-1] == ind_item) {
++	  return ConstIterator(*this, i-1);
++	}
++      }
++      return cend();
++    }
++
++    // reverse find -- start looking from bottom of heap
++    ConstIterator rfind(const T& item) const {
++      // HeapIndex is unsigned, so we can't allow to go negative; so
++      // we'll keep it one more than actual index
++      for (HeapIndex i = count; i > 0; --i) {
++	if (*data[i-1] == item) {
++	  return ConstIterator(*this, i-1);
++	}
++      }
++      return cend();
++    }
++
++    void promote(T& item) {
++      sift_up(item.*heap_info);
++    }
++
++    void demote(T& item) {
++      sift_down(item.*heap_info);
++    }
++
++    void adjust(T& item) {
++      sift(item.*heap_info);
++    }
++
++    Iterator begin() {
++      return Iterator(*this, 0);
++    }
++
++    Iterator end() {
++      return Iterator(*this, count);
++    }
++
++    ConstIterator cbegin() const {
++      return ConstIterator(*this, 0);
++    }
++
++    ConstIterator cend() const {
++      return ConstIterator(*this, count);
++    }
++
++    friend std::ostream& operator<<(std::ostream& out, const IndIntruHeap& h) {
++      auto i = h.data.cbegin();
++      if (i != h.data.cend()) {
++	out << **i;
++	++i;
++	while (i != h.data.cend()) {
++	  out << ", " << **i;
++	}
++      }
++      return out;
++    }
++
++    // can only be called if I is copyable; copies heap into a vector
++    // and sorts it before displaying it
++    std::ostream&
++    display_sorted(std::ostream& out,
++		   std::function<bool(const T&)> filter = all_filter) const {
++      static_assert(std::is_copy_constructible<I>::value,
++		    "cannot call display_sorted when class I is not copy"
++		    " constructible");
++      auto compare = [this] (const I first, const I second) -> bool {
++	return this->comparator(*first, *second);
++      };
++      std::vector<I> copy(data);
++      std::sort(copy.begin(), copy.end(), compare);
++
++      bool first = true;
++      for (auto c = copy.begin(); c != copy.end(); ++c) {
++	if (filter(**c)) {
++	  if (!first) {
++	    out << ", ";
++	  } else {
++	    first = false;
++	  }
++	  out << **c;
++	}
++      }
++
++      return out;
++    }
++
++
++  protected:
++
++    static IndIntruHeapData& intru_data_of(I& item) {
++      return (*item).*heap_info;
++    }
++
++    void remove(HeapIndex i) {
++      std::swap(data[i], data[--count]);
++      intru_data_of(data[i]) = i;
++      data.pop_back();
++
++      // the following needs to be sift (and not sift_down) as it can
++      // go up or down the heap; imagine the heap vector contains 0,
++      // 10, 100, 20, 30, 200, 300, 40; then 200 is removed, and 40
++      // would have to be sifted upwards
++      // sift(i);
++      sift(i);
++    }
++
++    // default value of filter parameter to display_sorted
++    static bool all_filter(const T& data) { return true; }
++
++    // when i is negative?
++    static inline HeapIndex parent(HeapIndex i) {
++      assert(0 != i);
++      return (i - 1) / K;
++    }
++
++    // index of left child when K==2, index of left-most child when K>2
++    static inline HeapIndex lhs(HeapIndex i) { return K*i + 1; }
++
++    // index of right child when K==2, index of right-most child when K>2
++    static inline HeapIndex rhs(HeapIndex i) { return K*i + K; }
++
++    void sift_up(HeapIndex i) {
++      while (i > 0) {
++	HeapIndex pi = parent(i);
++	if (!comparator(*data[i], *data[pi])) {
++	  break;
++	}
++
++	std::swap(data[i], data[pi]);
++	intru_data_of(data[i]) = i;
++	intru_data_of(data[pi]) = pi;
++	i = pi;
++      }
++    } // sift_up
++
++    // use this sift_down definition when K>2; it's more general and
++    // uses a loop; EnableBool insures template uses a template
++    // parameter
++    template<bool EnableBool=true>
++    typename std::enable_if<(K>2)&&EnableBool,void>::type sift_down(HeapIndex i) {
++      if (i >= count) return;
++      while (true) {
++	HeapIndex li = lhs(i);
++
++	if (li < count) {
++	  HeapIndex ri = std::min(rhs(i), count - 1);
++
++	  // find the index of min. child
++	  HeapIndex min_i = li;
++	  for (HeapIndex k = li + 1; k <= ri; ++k) {
++	    if (comparator(*data[k], *data[min_i])) {
++	      min_i = k;
++	    }
++	  }
++
++	  if (comparator(*data[min_i], *data[i])) {
++	    std::swap(data[i], data[min_i]);
++	    intru_data_of(data[i]) = i;
++	    intru_data_of(data[min_i]) = min_i;
++	    i = min_i;
++	  } else {
++	    // no child is smaller
++	    break;
++	  }
++	} else {
++	  // no children
++	  break;
++	}
++      }
++    } // sift_down
++
++    // use this sift_down definition when K==2; EnableBool insures
++    // template uses a template parameter
++    template<bool EnableBool=true>
++    typename std::enable_if<K==2&&EnableBool,void>::type sift_down(HeapIndex i) {
++      if (i >= count) return;
++      while (true) {
++	const HeapIndex li = lhs(i);
++	const HeapIndex ri = 1 + li;
++
++        if (li < count) {
++	  if (comparator(*data[li], *data[i])) {
++	    if (ri < count && comparator(*data[ri], *data[li])) {
++	      std::swap(data[i], data[ri]);
++	      intru_data_of(data[i]) = i;
++	      intru_data_of(data[ri]) = ri;
++	      i = ri;
++	    } else {
++	      std::swap(data[i], data[li]);
++	      intru_data_of(data[i]) = i;
++	      intru_data_of(data[li]) = li;
++	      i = li;
++            }
++	  } else if (ri < count && comparator(*data[ri], *data[i])) {
++	    std::swap(data[i], data[ri]);
++            intru_data_of(data[i]) = i;
++	    intru_data_of(data[ri]) = ri;
++	    i = ri;
++          } else {
++	    // no child is smaller
++            break;
++          }
++        } else {
++	  // no children
++          break;
++        }
++      } // while
++    } // sift_down
++
++    void sift(HeapIndex i) {
++      if (i == 0) {
++	// if we're at top, can only go down
++	sift_down(i);
++      } else {
++	HeapIndex pi = parent(i);
++	if (comparator(*data[i], *data[pi])) {
++	  // if we can go up, we will
++	  sift_up(i);
++	} else {
++	  // otherwise we'll try to go down
++	  sift_down(i);
++	}
++      }
++    } // sift
++  }; // class IndIntruHeap
++
++} // namespace crimson
diff --cc src/dmclock/support/src/intrusive_heap.h
index 00000000000,00000000000..291e5798149
new file mode 100644
--- /dev/null
+++ b/src/dmclock/support/src/intrusive_heap.h
@@@ -1,0 -1,0 +1,214 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++
++/*
++ * Copyright (C) 2016 Red Hat Inc.
++ */
++
++
++#pragma once
++
++
++#include <vector>
++#include <string>
++#include <iostream>
++#include <functional>
++
++#include "assert.h"
++
++
++namespace crimson {
++  using IntruHeapData = size_t;
++
++  // T = type of data in heap; I = functor that returns a non-const
++  // reference to IntruHeapData; C = functor that compares two const
++  // refs and return true if the first precedes the second
++  template<typename T, typename I, typename C>
++  class IntruHeap {
++
++    static_assert(
++      std::is_same<IntruHeapData&,typename std::result_of<I(T&)>::type>::value,
++      "class I must define operator() to take T& and return a IntruHeapData&.");
++
++    static_assert(
++      std::is_same<bool,typename std::result_of<C(const T&,const T&)>::type>::value,
++      "class C must define operator() to take two const T& and return a bool.");
++
++
++  protected:
++    using index_t = IntruHeapData;
++
++    std::vector<T> data;
++    index_t count;
++    I intru_data_of;
++    C comparator;
++
++  public:
++
++    IntruHeap() :
++      count(0)
++    {
++      // empty
++    }
++
++    IntruHeap(const IntruHeap<T,I,C>& other) :
++      count(other.count)
++    {
++      for (uint i = 0; i < other.count; ++i) {
++	data.push_back(other.data[i]);
++      }
++    }
++
++    bool empty() const { return 0 == count; }
++
++    T& top() { return data[0]; }
++
++    void push(T&& item) {
++      index_t i = count++;
++      intru_data_of(item) = i;
++      data.emplace_back(item);
++      sift_up(i);
++    }
++
++    void push(const T& item) {
++      T copy(item);
++      push(std::move(copy));
++    }
++
++    void pop() {
++      std::swap(data[0], data[--count]);
++      intru_data_of(data[0]) = 0;
++      data.pop_back();
++      sift_down(0);
++    }
++
++    void adjust_up(T& item) {
++      sift_up(intru_data_of(item));
++    }
++
++    void adjust_down(T& item) {
++      sift_down(intru_data_of(item));
++    }
++
++    void adjust(T& item) {
++      sift(intru_data_of(item));
++    }
++
++    friend std::ostream& operator<<(std::ostream& out, const IntruHeap& h) {
++      for (uint i = 0; i < h.count; ++i) {
++	out << h.data[i] << ", ";
++      }
++      return out;
++    }
++
++    std::ostream&
++    display_sorted(std::ostream& out,
++		   bool insert_line_breaks = true,
++		   std::function<bool(const T&)> filter = all_filter) const {
++      IntruHeap<T,I,C> copy = *this;
++
++      bool first = true;
++      out << "[ ";
++
++      while(!copy.empty()) {
++	const T& top = copy.top();
++	if (filter(top)) {
++	  if (!first) {
++	    out << ", ";
++	  }
++	  if (insert_line_breaks) {
++	    out << std::endl << "    ";
++	  }
++	  out << copy.top();
++	  first = false;
++	}
++	copy.pop();
++      }
++
++      out << " ]";
++      if (insert_line_breaks) {
++	out << std::endl;
++      }
++
++      return out;
++    }
++
++
++  protected:
++
++    // default value of filter parameter to display_sorted
++    static bool all_filter(const T& data) { return true; }
++
++    // when i is negative?
++    static inline index_t parent(index_t i) {
++      assert(0 != i);
++      return (i - 1) / 2;
++    }
++
++    static inline index_t lhs(index_t i) { return 2*i + 1; }
++
++    static inline index_t rhs(index_t i) { return 2*i + 2; }
++
++    void sift_up(index_t i) {
++      while (i > 0) {
++	index_t pi = parent(i);
++	if (!comparator(data[i], data[pi])) {
++	  break;
++	}
++
++	std::swap(data[i], data[pi]);
++	intru_data_of(data[i]) = i;
++	intru_data_of(data[pi]) = pi;
++	i = pi;
++      }
++    } // sift_up
++
++    void sift_down(index_t i) {
++      while (i < count) {
++	index_t li = lhs(i);
++	index_t ri = rhs(i);
++
++	if (li < count) {
++	  if (comparator(data[li], data[i])) {
++	    if (ri < count && comparator(data[ri], data[li])) {
++	      std::swap(data[i], data[ri]);
++	      intru_data_of(data[i]) = i;
++	      intru_data_of(data[ri]) = ri;
++	      i = ri;
++	    } else {
++	      std::swap(data[i], data[li]);
++	      intru_data_of(data[i]) = i;
++	      intru_data_of(data[li]) = li;
++	      i = li;
++	    }
++	  } else if (ri < count && comparator(data[ri], data[i])) {
++	    std::swap(data[i], data[ri]);
++	    intru_data_of(data[i]) = i;
++	    intru_data_of(data[ri]) = ri;
++	    i = ri;
++	  } else {
++	    break;
++	  }
++	} else {
++	  break;
++	}
++      }
++    } // sift_down
++
++    void sift(index_t i) {
++      if (i == 0) {
++	// if we're at top, can only go down
++	sift_down(i);
++      } else {
++	index_t pi = parent(i);
++	if (comparator(data[i], data[pi])) {
++	  // if we can go up, we will
++	  sift_up(i);
++	} else {
++	  // otherwise we'll try to go down
++	  sift_down(i);
++	}
++      }
++    } // sift
++  }; // class IntruHeap
++} // namespace crimson
diff --cc src/dmclock/support/src/profile.h
index 00000000000,00000000000..77493c75be5
new file mode 100644
--- /dev/null
+++ b/src/dmclock/support/src/profile.h
@@@ -1,0 -1,0 +1,114 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++
++/*
++ * Copyright (C) 2016 Red Hat Inc.
++ */
++
++
++#pragma once
++
++
++#include <cmath>
++#include <chrono>
++
++
++namespace crimson {
++  template<typename T>
++  class ProfileBase {
++
++  protected:
++
++    using clock = std::chrono::steady_clock;
++
++    uint count = 0;
++    typename T::rep sum = 0;
++    typename T::rep sum_squares = 0;
++    typename T::rep low = 0;
++    typename T::rep high = 0;
++
++  public:
++
++    uint get_count() const { return count; }
++    typename T::rep get_sum() const { return sum; }
++    typename T::rep get_low() const { return low; }
++    typename T::rep get_high() const { return high; }
++    double get_mean() const {
++      if (0 == count) return nan("");
++      return sum / double(count); }
++    double get_std_dev() const {
++      if (0 == count) return nan("");
++      double variance =
++	(count * sum_squares - sum * sum) / double(count * count);
++      return sqrt(variance);
++    }
++  }; // class ProfileBase
++
++
++  // forward declaration for friend
++  template<typename T>
++  class ProfileCombiner;
++
++
++  template<typename T>
++  class ProfileTimer : public ProfileBase<T> {
++    friend ProfileCombiner<T>;
++
++    using super = ProfileBase<T>;
++
++    bool is_timing = false;
++    typename super::clock::time_point start_time;
++
++  public:
++
++    ProfileTimer() {
++    }
++
++    void start() {
++      assert(!is_timing);
++      start_time = super::clock::now();
++      is_timing = true;
++    }
++
++    void stop() {
++      assert(is_timing);
++      T duration = std::chrono::duration_cast<T>(super::clock::now() - start_time);
++      typename T::rep duration_count = duration.count();
++      this->sum += duration_count;
++      this->sum_squares += duration_count * duration_count;
++      if (0 == this->count) {
++	this->low = duration_count;
++	this->high = duration_count;
++      } else {
++	if (duration_count < this->low) this->low = duration_count;
++	else if (duration_count > this->high) this->high = duration_count;
++      }
++      ++this->count;
++      is_timing = false;
++    }
++  };  // class ProfileTimer
++
++
++  template<typename T>
++  class ProfileCombiner : public ProfileBase<T> {
++
++    using super = ProfileBase<T>;
++
++  public:
++
++    ProfileCombiner() {}
++
++    void combine(const ProfileTimer<T>& timer) {
++      if (0 == this->count) {
++	this->low = timer.low;
++	this->high = timer.high;
++      } else {
++	if (timer.low < this->low) this->low = timer.low;
++	else if (timer.high > this->high) this->high = timer.high;
++      }
++      this->count += timer.count;
++      this->sum += timer.sum;
++      this->sum_squares += timer.sum_squares;
++    }
++  }; // class ProfileCombiner
++} // namespace crimson
diff --cc src/dmclock/support/src/run_every.cc
index 00000000000,00000000000..258baaa74c0
new file mode 100644
--- /dev/null
+++ b/src/dmclock/support/src/run_every.cc
@@@ -1,0 -1,0 +1,73 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++/*
++ * Copyright (C) 2016 Red Hat Inc.
++ */
++
++
++#include <iostream>
++
++#include "run_every.h"
++
++
++// can define ADD_MOVE_SEMANTICS, although not fully debugged and tested
++
++
++namespace chrono = std::chrono;
++
++
++#ifdef ADD_MOVE_SEMANTICS
++crimson::RunEvery::RunEvery()
++{
++  // empty
++}
++
++
++crimson::RunEvery& crimson::RunEvery::operator=(crimson::RunEvery&& other)
++{
++  // finish run every thread
++  {
++    Guard g(mtx);
++    finishing = true;
++    cv.notify_one();
++  }
++  if (thd.joinable()) {
++    thd.join();
++  }
++
++  // transfer info over from previous thread
++  finishing.store(other.finishing);
++  wait_period = other.wait_period;
++  body = other.body;
++
++  // finish other thread
++  other.finishing.store(true);
++  other.cv.notify_one();
++
++  // start this thread
++  thd = std::thread(&RunEvery::run, this);
++
++  return *this;
++}
++#endif
++
++
++crimson::RunEvery::~RunEvery() {
++  finishing = true;
++  cv.notify_all();
++  thd.join();
++}
++
++
++void crimson::RunEvery::run() {
++  Lock l(mtx);
++  while(!finishing) {
++    TimePoint until = chrono::steady_clock::now() + wait_period;
++    while (!finishing && chrono::steady_clock::now() < until) {
++      cv.wait_until(l, until);
++    }
++    if (!finishing) {
++      body();
++    }
++  }
++}
diff --cc src/dmclock/support/src/run_every.h
index 00000000000,00000000000..c3499da91ef
new file mode 100644
--- /dev/null
+++ b/src/dmclock/support/src/run_every.h
@@@ -1,0 -1,0 +1,68 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++/*
++ * Copyright (C) 2016 Red Hat Inc.
++ */
++
++
++#pragma once
++
++#include <chrono>
++#include <mutex>
++#include <condition_variable>
++#include <thread>
++
++
++namespace crimson {
++  using std::chrono::duration_cast;
++  using std::chrono::milliseconds;
++
++  // runs a given simple function object waiting wait_period
++  // milliseconds between; the destructor stops the other thread
++  // immediately
++  class RunEvery {
++    using Lock      = std::unique_lock<std::mutex>;
++    using Guard     = std::lock_guard<std::mutex>;
++    using TimePoint = std::chrono::steady_clock::time_point;
++
++    bool                      finishing = false;
++    std::chrono::milliseconds wait_period;
++    std::function<void()>     body;
++    std::mutex                mtx;
++    std::condition_variable   cv;
++
++    // put threads last so all other variables are initialized first
++
++    std::thread               thd;
++
++  public:
++
++#ifdef ADD_MOVE_SEMANTICS
++    RunEvery();
++#endif
++
++    template<typename D>
++    RunEvery(D                     _wait_period,
++	     std::function<void()> _body) :
++      wait_period(duration_cast<milliseconds>(_wait_period)),
++      body(_body)
++    {
++      thd = std::thread(&RunEvery::run, this);
++    }
++
++    RunEvery(const RunEvery& other) = delete;
++    RunEvery& operator=(const RunEvery& other) = delete;
++    RunEvery(RunEvery&& other) = delete;
++#ifdef ADD_MOVE_SEMANTICS
++    RunEvery& operator=(RunEvery&& other);
++#else
++    RunEvery& operator=(RunEvery&& other) = delete;
++#endif
++
++    ~RunEvery();
++
++  protected:
++
++    void run();
++  };
++}
diff --cc src/dmclock/support/test/CMakeLists.txt
index 00000000000,00000000000..addea6c96a9
new file mode 100644
--- /dev/null
+++ b/src/dmclock/support/test/CMakeLists.txt
@@@ -1,0 -1,0 +1,29 @@@
++include_directories(../src)
++
++set(local_flags "-Wall -pthread")
++
++# dmclock does not use intrusive heap (but it does use indirect
++# intrusive heap), so we won't use this code
++if(false)
++  set(srcs
++    test_intrusive_heap.cc)
++  add_executable(test_intru_heap test_intrusive_heap.cc)
++  set_source_files_properties(${srcs}
++    PROPERTIES
++    COMPILE_FLAGS "${local_flags}")
++endif(false)
++
++set(test_srcs test_indirect_intrusive_heap.cc)
++
++set_source_files_properties(${test_srcs}
++  PROPERTIES
++  COMPILE_FLAGS "${local_flags}"
++  )
++
++add_executable(dmclock-data-struct-tests ${test_srcs})
++
++target_link_libraries(dmclock-data-struct-tests
++  LINK_PRIVATE gtest gtest_main pthread)
++
++add_test(NAME dmclock-data-struct-tests
++  COMMAND $<TARGET_FILE:dmclock-data-struct-tests>)
diff --cc src/dmclock/support/test/test_ind_intru_heap.cc
index 00000000000,00000000000..9ec03b5cacf
new file mode 100644
--- /dev/null
+++ b/src/dmclock/support/test/test_ind_intru_heap.cc
@@@ -1,0 -1,0 +1,82 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++
++/*
++ * Copyright (C) 2016 Red Hat Inc.
++ */
++
++
++#include <memory>
++#include <string>
++#include <iostream>
++
++#include "indirect_intrusive_heap.h"
++
++
++class TestCompare;
++
++
++class Test1 {
++    friend TestCompare;
++
++    int data;
++
++public:
++
++    crimson::IndIntruHeapData heap_data;
++
++    Test1(int _data) : data(_data) {}
++
++    friend std::ostream& operator<<(std::ostream& out, const Test1& d) {
++        out << d.data << " (" << d.heap_data << ")";
++        return out;
++    }
++
++    int& the_data() { return data; }
++};
++
++
++struct TestCompare {
++    bool operator()(const Test1& d1, const Test1& d2) {
++        return d1.data < d2.data;
++    }
++};
++
++
++int main(int argc, char** argv) {
++    Test1 d1(2);
++    Test1 d2(3);
++    Test1 d3(1);
++    Test1 d4(-5);
++
++    crimson::IndIntruHeap<std::shared_ptr<Test1>, Test1, &Test1::heap_data, TestCompare> my_heap;
++
++    const std::shared_ptr<Test1> d99 = std::make_shared<Test1>(99);
++
++    my_heap.push(std::make_shared<Test1>(2));
++    my_heap.push(d99);
++    my_heap.push(std::make_shared<Test1>(1));
++    my_heap.push(std::make_shared<Test1>(-5));
++    my_heap.push(std::make_shared<Test1>(12));
++    my_heap.push(std::make_shared<Test1>(-12));
++    my_heap.push(std::make_shared<Test1>(-7));
++
++    std::cout << my_heap << std::endl;
++
++    auto& t = my_heap.top();
++    t.the_data() = 17;
++    my_heap.adjust_down(t);
++
++    std::cout << my_heap << std::endl;
++
++    my_heap.display_sorted(std::cout);
++
++    while (!my_heap.empty()) {
++        auto& top = my_heap.top();
++        std::cout << top << std::endl;
++        my_heap.pop();
++        std::cout << my_heap << std::endl;
++    }
++
++    return 0;
++}
diff --cc src/dmclock/support/test/test_indirect_intrusive_heap.cc
index 00000000000,00000000000..23863a24ce9
new file mode 100644
--- /dev/null
+++ b/src/dmclock/support/test/test_indirect_intrusive_heap.cc
@@@ -1,0 -1,0 +1,930 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++
++/*
++ * Copyright (C) 2016 Red Hat Inc.
++ */
++
++#include <iostream>
++#include <memory>
++#include <set>
++
++#include "gtest/gtest.h"
++
++#include "indirect_intrusive_heap.h"
++
++
++struct Elem {
++  int data;
++
++  crimson::IndIntruHeapData heap_data;
++  crimson::IndIntruHeapData heap_data_alt;
++
++  Elem(int _data) : data(_data) { }
++
++  bool operator==(const Elem& other) {
++    return data == other.data;
++  }
++
++  friend std::ostream& operator<<(std::ostream& out, const Elem& d) {
++    out << d.data;
++    return out;
++  }
++};
++
++
++// sorted low to high
++struct ElemCompare {
++  bool operator()(const Elem& d1, const Elem& d2) const {
++    return d1.data < d2.data;
++  }
++};
++
++
++// first all evens precede all odds, then they're sorted high to low
++struct ElemCompareAlt {
++  bool operator()(const Elem& d1, const Elem& d2) {
++    if (0 == d1.data % 2) {
++      if (0 == d2.data % 2) {
++	return d1.data > d2.data;
++      } else {
++	return true;
++      }
++    } else if (0 == d2.data % 2) {
++      return false;
++    } else {
++      return d1.data > d2.data;
++    }
++  }
++};
++
++
++class HeapFixture1: public ::testing::Test {
++
++public:
++
++  crimson::IndIntruHeap<std::shared_ptr<Elem>,
++			Elem,
++			&Elem::heap_data,
++			ElemCompare> heap;
++
++  std::shared_ptr<Elem> data1, data2, data3, data4, data5, data6, data7;
++
++  void SetUp() {
++    data1 = std::make_shared<Elem>(2);
++    data2 = std::make_shared<Elem>(99);
++    data3 = std::make_shared<Elem>(1);
++    data4 = std::make_shared<Elem>(-5);
++    data5 = std::make_shared<Elem>(12);
++    data6 = std::make_shared<Elem>(-12);
++    data7 = std::make_shared<Elem>(-7);
++
++    heap.push(data1);
++    heap.push(data2);
++    heap.push(data3);
++    heap.push(data4);
++    heap.push(data5);
++    heap.push(data6);
++    heap.push(data7);
++  }
++
++  void TearDown() {
++    // nothing to do
++  }
++}; // class HeapFixture1
++
++TEST(IndIntruHeap, shared_ptr) {
++  crimson::IndIntruHeap<std::shared_ptr<Elem>,
++			Elem,
++			&Elem::heap_data,
++			ElemCompare> heap;
++
++  EXPECT_TRUE(heap.empty());
++
++  heap.push(std::make_shared<Elem>(2));
++
++  EXPECT_FALSE(heap.empty());
++
++  heap.push(std::make_shared<Elem>(99));
++  heap.push(std::make_shared<Elem>(1));
++  heap.push(std::make_shared<Elem>(-5));
++  heap.push(std::make_shared<Elem>(12));
++  heap.push(std::make_shared<Elem>(-12));
++  heap.push(std::make_shared<Elem>(-7));
++
++  // std::cout << heap << std::endl;
++
++  EXPECT_FALSE(heap.empty());
++
++  EXPECT_EQ(-12, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(-7, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(-5, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(1, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(2, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(12, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(99, heap.top().data);
++
++  EXPECT_FALSE(heap.empty());
++  heap.pop();
++  EXPECT_TRUE(heap.empty());
++}
++
++
++TEST(IndIntruHeap, unique_ptr) {
++  crimson::IndIntruHeap<std::unique_ptr<Elem>,
++			Elem,
++			&Elem::heap_data,
++			ElemCompare> heap;
++
++  EXPECT_TRUE(heap.empty());
++
++  heap.push(std::unique_ptr<Elem>(new Elem(2)));
++
++  EXPECT_FALSE(heap.empty());
++
++  heap.push(std::unique_ptr<Elem>(new Elem(99)));
++  heap.push(std::unique_ptr<Elem>(new Elem(1)));
++  heap.push(std::unique_ptr<Elem>(new Elem(-5)));
++  heap.push(std::unique_ptr<Elem>(new Elem(12)));
++  heap.push(std::unique_ptr<Elem>(new Elem(-12)));
++  heap.push(std::unique_ptr<Elem>(new Elem(-7)));
++
++  EXPECT_FALSE(heap.empty());
++
++  EXPECT_EQ(-12, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(-7, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(-5, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(1, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(2, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(12, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(99, heap.top().data);
++
++  EXPECT_FALSE(heap.empty());
++  heap.pop();
++  EXPECT_TRUE(heap.empty());
++}
++
++
++TEST(IndIntruHeap, regular_ptr) {
++  crimson::IndIntruHeap<Elem*, Elem, &Elem::heap_data, ElemCompare> heap;
++
++  EXPECT_TRUE(heap.empty());
++
++  heap.push(new Elem(2));
++
++  EXPECT_FALSE(heap.empty());
++
++  heap.push(new Elem(99));
++  heap.push(new Elem(1));
++  heap.push(new Elem(-5));
++  heap.push(new Elem(12));
++  heap.push(new Elem(-12));
++  heap.push(new Elem(-7));
++
++  EXPECT_FALSE(heap.empty());
++
++  EXPECT_EQ(-12, heap.top().data);
++  delete &heap.top();
++  heap.pop();
++  EXPECT_EQ(-7, heap.top().data);
++  delete &heap.top();
++  heap.pop();
++  EXPECT_EQ(-5, heap.top().data);
++  delete &heap.top();
++  heap.pop();
++  EXPECT_EQ(1, heap.top().data);
++  delete &heap.top();
++  heap.pop();
++  EXPECT_EQ(2, heap.top().data);
++  delete &heap.top();
++  heap.pop();
++  EXPECT_EQ(12, heap.top().data);
++  delete &heap.top();
++  heap.pop();
++  EXPECT_EQ(99, heap.top().data);
++
++  delete &heap.top();
++
++  EXPECT_FALSE(heap.empty());
++  heap.pop();
++  EXPECT_TRUE(heap.empty());
++}
++
++
++TEST(IndIntruHeap, K_3) {
++  crimson::IndIntruHeap<std::shared_ptr<Elem>,
++			Elem,
++			&Elem::heap_data,
++			ElemCompare,
++			3> heap;
++
++  EXPECT_TRUE(heap.empty());
++
++  heap.push(std::make_shared<Elem>(2));
++
++  EXPECT_FALSE(heap.empty());
++
++  heap.push(std::make_shared<Elem>(99));
++  heap.push(std::make_shared<Elem>(1));
++  heap.push(std::make_shared<Elem>(-5));
++  heap.push(std::make_shared<Elem>(12));
++  heap.push(std::make_shared<Elem>(-12));
++  heap.push(std::make_shared<Elem>(-7));
++
++  // std::cout << heap << std::endl;
++
++  EXPECT_FALSE(heap.empty());
++
++  EXPECT_EQ(-12, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(-7, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(-5, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(1, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(2, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(12, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(99, heap.top().data);
++
++  EXPECT_FALSE(heap.empty());
++  heap.pop();
++  EXPECT_TRUE(heap.empty());
++}
++
++
++TEST(IndIntruHeap, K_4) {
++  crimson::IndIntruHeap<std::shared_ptr<Elem>,
++			Elem,
++			&Elem::heap_data,
++			ElemCompare,
++			4> heap;
++
++  EXPECT_TRUE(heap.empty());
++
++  heap.push(std::make_shared<Elem>(2));
++
++  EXPECT_FALSE(heap.empty());
++
++  heap.push(std::make_shared<Elem>(99));
++  heap.push(std::make_shared<Elem>(1));
++  heap.push(std::make_shared<Elem>(-5));
++  heap.push(std::make_shared<Elem>(12));
++  heap.push(std::make_shared<Elem>(-12));
++  heap.push(std::make_shared<Elem>(-7));
++
++  // std::cout << heap << std::endl;
++
++  EXPECT_FALSE(heap.empty());
++
++  EXPECT_EQ(-12, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(-7, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(-5, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(1, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(2, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(12, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(99, heap.top().data);
++
++  EXPECT_FALSE(heap.empty());
++  heap.pop();
++  EXPECT_TRUE(heap.empty());
++}
++
++
++TEST(IndIntruHeap, K_10) {
++  crimson::IndIntruHeap<std::shared_ptr<Elem>,
++			Elem,
++			&Elem::heap_data,
++			ElemCompare,
++			10> heap;
++
++  EXPECT_TRUE(heap.empty());
++
++  heap.push(std::make_shared<Elem>(2));
++
++  EXPECT_FALSE(heap.empty());
++
++  heap.push(std::make_shared<Elem>(99));
++  heap.push(std::make_shared<Elem>(1));
++  heap.push(std::make_shared<Elem>(-5));
++  heap.push(std::make_shared<Elem>(12));
++  heap.push(std::make_shared<Elem>(-12));
++  heap.push(std::make_shared<Elem>(-7));
++
++  // std::cout << heap << std::endl;
++
++  EXPECT_FALSE(heap.empty());
++
++  EXPECT_EQ(-12, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(-7, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(-5, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(1, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(2, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(12, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(99, heap.top().data);
++
++  EXPECT_FALSE(heap.empty());
++  heap.pop();
++  EXPECT_TRUE(heap.empty());
++}
++
++
++TEST(IndIntruHeap, multi_K) {
++  crimson::IndIntruHeap<std::shared_ptr<Elem>,
++			Elem,
++			&Elem::heap_data,
++			ElemCompare,
++			2> heap2;
++
++  crimson::IndIntruHeap<std::shared_ptr<Elem>,
++			Elem,
++			&Elem::heap_data,
++			ElemCompare,
++			3> heap3;
++
++  crimson::IndIntruHeap<std::shared_ptr<Elem>,
++			Elem,
++			&Elem::heap_data,
++			ElemCompare,
++			4> heap4;
++
++  crimson::IndIntruHeap<std::shared_ptr<Elem>,
++			Elem,
++			&Elem::heap_data,
++			ElemCompare,
++			10> heap10;
++
++  // 250 should give us at least 4 levels on all heaps
++  constexpr size_t count = 250;
++
++  std::srand(std::time(0)); // use current time as seed for random generator
++
++  // insert same set of random values into the four heaps
++  for (size_t i = 0; i < count; ++i) {
++    int value = std::rand() % 201 - 100; // -100...+100
++    auto data = std::make_shared<Elem>(value);
++    heap2.push(data);
++    heap3.push(data);
++    heap4.push(data);
++    heap10.push(data);
++  }
++
++  auto bound = std::numeric_limits<decltype(Elem::data)>::min();
++
++  for (size_t i = 0; i < count; ++i) {
++    auto current = heap2.top().data;
++
++    EXPECT_GE(current, bound) <<
++      "we should never go down, only increase or remain the same";
++    EXPECT_EQ(current, heap3.top().data) <<
++      "heap1's data and heap3's data should match";
++    EXPECT_EQ(current, heap4.top().data) <<
++      "heap1's data and heap4's data should match";
++    EXPECT_EQ(current, heap10.top().data) <<
++      "heap1's data and heap10's data should match";
++
++    heap2.pop();
++    heap3.pop();
++    heap4.pop();
++    heap10.pop();
++
++    bound = current;
++  }
++
++  EXPECT_TRUE(heap2.empty()) << "should be empty after all elements popped";
++  EXPECT_TRUE(heap3.empty()) << "should be empty after all elements popped";
++  EXPECT_TRUE(heap4.empty()) << "should be empty after all elements popped";
++  EXPECT_TRUE(heap10.empty()) << "should be empty after all elements popped";
++}
++
++
++TEST(IndIntruHeap, demote) {
++  crimson::IndIntruHeap<std::unique_ptr<Elem>,
++			Elem,
++			&Elem::heap_data,
++			ElemCompare> heap;
++
++  heap.push(std::unique_ptr<Elem>(new Elem(2)));
++  heap.push(std::unique_ptr<Elem>(new Elem(99)));
++  heap.push(std::unique_ptr<Elem>(new Elem(1)));
++  heap.push(std::unique_ptr<Elem>(new Elem(-5)));
++  heap.push(std::unique_ptr<Elem>(new Elem(12)));
++  heap.push(std::unique_ptr<Elem>(new Elem(-12)));
++  heap.push(std::unique_ptr<Elem>(new Elem(-7)));
++
++  heap.top().data = 24;
++
++  heap.demote(heap.top());
++
++  EXPECT_EQ(-7, heap.top().data);
++
++  heap.pop();
++  heap.pop();
++  heap.pop();
++  heap.pop();
++  heap.pop();
++
++  EXPECT_EQ(24, heap.top().data);
++}
++
++
++TEST(IndIntruHeap, demote_not) {
++  crimson::IndIntruHeap<std::unique_ptr<Elem>,
++			Elem,
++			&Elem::heap_data,
++			ElemCompare> heap;
++
++  heap.push(std::unique_ptr<Elem>(new Elem(2)));
++  heap.push(std::unique_ptr<Elem>(new Elem(99)));
++  heap.push(std::unique_ptr<Elem>(new Elem(1)));
++  heap.push(std::unique_ptr<Elem>(new Elem(-5)));
++  heap.push(std::unique_ptr<Elem>(new Elem(12)));
++  heap.push(std::unique_ptr<Elem>(new Elem(-12)));
++  heap.push(std::unique_ptr<Elem>(new Elem(-7)));
++
++  heap.top().data = -99;
++
++  heap.demote(heap.top());
++
++  EXPECT_EQ(-99, heap.top().data);
++
++  heap.pop();
++
++  EXPECT_EQ(-7, heap.top().data);
++}
++
++
++TEST(IndIntruHeap, promote_and_demote) {
++  crimson::IndIntruHeap<std::shared_ptr<Elem>,
++			Elem,
++			&Elem::heap_data,
++			ElemCompare> heap;
++
++  auto data1 = std::make_shared<Elem>(1);
++
++  heap.push(std::make_shared<Elem>(2));
++  heap.push(std::make_shared<Elem>(99));
++  heap.push(data1);
++  heap.push(std::make_shared<Elem>(-5));
++  heap.push(std::make_shared<Elem>(12));
++  heap.push(std::make_shared<Elem>(-12));
++  heap.push(std::make_shared<Elem>(-7));
++
++  EXPECT_EQ(-12, heap.top().data);
++
++  data1->data = -99;
++  heap.promote(*data1);
++
++  EXPECT_EQ(-99, heap.top().data);
++
++  data1->data = 999;
++  heap.demote(*data1);
++
++  EXPECT_EQ(-12, heap.top().data);
++
++  data1->data = 9;
++  heap.promote(*data1);
++
++  heap.pop(); // remove -12
++  heap.pop(); // remove -7
++  heap.pop(); // remove -5
++  heap.pop(); // remove 2
++
++  EXPECT_EQ(9, heap.top().data);
++}
++
++
++TEST(IndIntruHeap, adjust) {
++  crimson::IndIntruHeap<std::shared_ptr<Elem>,
++			Elem,
++			&Elem::heap_data,
++			ElemCompare> heap;
++
++  auto data1 = std::make_shared<Elem>(1);
++
++  heap.push(std::make_shared<Elem>(2));
++  heap.push(std::make_shared<Elem>(99));
++  heap.push(data1);
++  heap.push(std::make_shared<Elem>(-5));
++  heap.push(std::make_shared<Elem>(12));
++  heap.push(std::make_shared<Elem>(-12));
++  heap.push(std::make_shared<Elem>(-7));
++
++  // heap.display_sorted(std::cout);
++
++  EXPECT_EQ(-12, heap.top().data);
++
++  data1->data = 999;
++  heap.adjust(*data1);
++
++  EXPECT_EQ(-12, heap.top().data);
++
++  data1->data = -99;
++  heap.adjust(*data1);
++
++  EXPECT_EQ(-99, heap.top().data);
++
++  data1->data = 9;
++  heap.adjust(*data1);
++
++  EXPECT_EQ(-12, heap.top().data);
++
++  heap.pop(); // remove -12
++  heap.pop(); // remove -7
++  heap.pop(); // remove -5
++  heap.pop(); // remove 2
++
++  EXPECT_EQ(9, heap.top().data);
++}
++
++
++TEST(IndIntruHeap, remove_careful) {
++  // here we test whether a common mistake in implementing remove is
++  // done; if after we remove an item and move the last element of the
++  // heap to the position of the removed element, we need to sift it
++  // rather than sift_down it.
++
++  crimson::IndIntruHeap<std::shared_ptr<Elem>,
++			Elem,
++			&Elem::heap_data,
++			ElemCompare,
++			2> heap;
++
++  heap.push(std::make_shared<Elem>(0));
++  heap.push(std::make_shared<Elem>(10));
++  heap.push(std::make_shared<Elem>(100));
++  heap.push(std::make_shared<Elem>(20));
++  heap.push(std::make_shared<Elem>(30));
++  heap.push(std::make_shared<Elem>(200));
++  heap.push(std::make_shared<Elem>(300));
++  heap.push(std::make_shared<Elem>(40));
++
++  auto k = heap.find(Elem(200));
++  EXPECT_NE(heap.end(), k) <<
++    "we should have found an element with the value 200, which we'll remove";
++  heap.remove(k);
++
++  auto i = heap.cbegin();
++  EXPECT_EQ(0, i->data);
++  ++i;
++  EXPECT_EQ(10, i->data);
++  ++i;
++  EXPECT_EQ(40, i->data) <<
++    "this needs to be 40 or there's a mistake in implementation";
++  ++i;
++  EXPECT_EQ(20, i->data);
++  ++i;
++  EXPECT_EQ(30, i->data);
++  ++i;
++  EXPECT_EQ(100, i->data) <<
++    "this needs to be 100 or there's a mistake in implementation";
++}
++
++
++TEST_F(HeapFixture1, shared_data) {
++
++  crimson::IndIntruHeap<std::shared_ptr<Elem>,Elem,&Elem::heap_data_alt,ElemCompareAlt> heap2;
++
++  heap2.push(data1);
++  heap2.push(data2);
++  heap2.push(data3);
++  heap2.push(data4);
++  heap2.push(data5);
++  heap2.push(data6);
++  heap2.push(data7);
++
++  data3->data = 32;
++  heap.adjust(*data3);
++  heap2.adjust(*data3);
++
++  EXPECT_EQ(-12, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(-7, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(-5, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(2, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(12, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(32, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(99, heap.top().data);
++
++  EXPECT_EQ(32, heap2.top().data);
++  heap2.pop();
++  EXPECT_EQ(12, heap2.top().data);
++  heap2.pop();
++  EXPECT_EQ(2, heap2.top().data);
++  heap2.pop();
++  EXPECT_EQ(-12, heap2.top().data);
++  heap2.pop();
++  EXPECT_EQ(99, heap2.top().data);
++  heap2.pop();
++  EXPECT_EQ(-5, heap2.top().data);
++  heap2.pop();
++  EXPECT_EQ(-7, heap2.top().data);
++}
++
++
++TEST_F(HeapFixture1, iterator_basics) {
++  {
++    uint count = 0;
++    for(auto i = heap.begin(); i != heap.end(); ++i) {
++      ++count;
++    }
++
++    EXPECT_EQ(7u, count) << "count should be 7";
++  }
++
++  auto i1 = heap.begin();
++
++  EXPECT_EQ(-12, i1->data) <<
++    "first member with * operator must be smallest";
++
++  EXPECT_EQ(-12, (*i1).data) <<
++    "first member with -> operator must be smallest";
++
++  Elem& e1 = *i1;
++  EXPECT_EQ(-12, e1.data) <<
++    "first member with -> operator must be smallest";
++
++  {
++    std::set<int> values;
++    values.insert(2);
++    values.insert(99);
++    values.insert(1);
++    values.insert(-5);
++    values.insert(12);
++    values.insert(-12);
++    values.insert(-7);
++
++    for(auto i = heap.begin(); i != heap.end(); ++i) {
++      auto v = *i;
++      EXPECT_NE(values.end(), values.find(v.data)) <<
++	"value in heap must be part of original set";
++      values.erase(v.data);
++    }
++    EXPECT_EQ(0u, values.size()) << "all values must have been seen";
++  }
++}
++
++
++TEST_F(HeapFixture1, const_iterator_basics) {
++  const auto& cheap = heap;
++
++  {
++    uint count = 0;
++    for(auto i = cheap.cbegin(); i != cheap.cend(); ++i) {
++      ++count;
++    }
++
++    EXPECT_EQ(7u, count) << "count should be 7";
++  }
++
++  auto i1 = heap.cbegin();
++
++  EXPECT_EQ(-12, i1->data) <<
++    "first member with * operator must be smallest";
++
++  EXPECT_EQ(-12, (*i1).data) <<
++    "first member with -> operator must be smallest";
++
++  const Elem& e1 = *i1;
++  EXPECT_EQ(-12, e1.data) <<
++    "first member with -> operator must be smallest";
++
++  {
++    std::set<int> values;
++    values.insert(2);
++    values.insert(99);
++    values.insert(1);
++    values.insert(-5);
++    values.insert(12);
++    values.insert(-12);
++    values.insert(-7);
++
++    for(auto i = heap.cbegin(); i != heap.cend(); ++i) {
++      auto v = *i;
++      EXPECT_NE(values.end(), values.find(v.data)) <<
++	"value in heap must be part of original set";
++      values.erase(v.data);
++    }
++    EXPECT_EQ(0u, values.size()) << "all values must have been seen";
++  }
++}
++
++
++TEST_F(HeapFixture1, iterator_find_rfind) {
++  {
++    auto it1 = heap.find(data7);
++    EXPECT_NE(heap.end(), it1) <<
++      "find by indirection for included element should succeed";
++    EXPECT_EQ(-7, it1->data) <<
++      "find by indirection for included element should result in right value";
++
++    auto fake_data = std::make_shared<Elem>(-7);
++    auto it2 = heap.find(fake_data);
++    EXPECT_EQ(heap.end(), it2) <<
++      "find by indirection for not included element should fail";
++  }
++
++  {
++    auto it1 = heap.find(Elem(-7));
++    EXPECT_NE(heap.end(), it1) <<
++      "find by value for included element should succeed";
++    EXPECT_EQ(-7, it1->data) <<
++      "find by value for included element should result in right value";
++
++    auto it2 = heap.find(Elem(7));
++    EXPECT_EQ(heap.end(), it2) <<
++      "find by value for not included element should fail";
++  }
++
++  {
++    auto it1 = heap.rfind(data7);
++    EXPECT_NE(heap.end(), it1) <<
++      "reverse find by indirecton for included element should succeed";
++    EXPECT_EQ(-7, it1->data) <<
++      "reverse find by indirection for included element should result "
++      "in right value";
++
++    auto fake_data = std::make_shared<Elem>(-7);
++    auto it2 = heap.rfind(fake_data);
++    EXPECT_EQ(heap.end(), it2) <<
++      "reverse find by indirection for not included element should fail";
++  }
++
++  {
++    auto it1 = heap.rfind(Elem(-7));
++    EXPECT_NE(heap.end(), it1) <<
++      "reverse find by value for included element should succeed";
++    EXPECT_EQ(-7, it1->data) <<
++      "reverse find by value for included element should result "
++      "in right value";
++
++    auto it2 = heap.rfind(Elem(7));
++    EXPECT_EQ(heap.end(), it2) <<
++      "reverse find by value for not included element should fail";
++  }
++}
++
++
++TEST_F(HeapFixture1, const_iterator_find_rfind) {
++  const auto& c_heap = heap;
++
++  {
++    auto it1 = c_heap.find(data7);
++    EXPECT_NE(c_heap.cend(), it1) <<
++      "find by indirection for included element should succeed";
++    EXPECT_EQ(-7, it1->data) <<
++      "find by indirection for included element should result in right value";
++
++    auto fake_data = std::make_shared<Elem>(-7);
++    auto it2 = c_heap.find(fake_data);
++    EXPECT_EQ(c_heap.cend(), it2) <<
++      "find by indirection for not included element should fail";
++  }
++
++  {
++    auto it1 = c_heap.find(Elem(-7));
++    EXPECT_NE(c_heap.cend(), it1) <<
++      "find by value for included element should succeed";
++    EXPECT_EQ(-7, it1->data) <<
++      "find by value for included element should result in right value";
++
++    auto it2 = c_heap.find(Elem(7));
++    EXPECT_EQ(c_heap.cend(), it2) <<
++      "find by value for not included element should fail";
++  }
++
++  {
++    auto it1 = c_heap.rfind(data7);
++    EXPECT_NE(c_heap.cend(), it1) <<
++      "reverse find by indirecton for included element should succeed";
++    EXPECT_EQ(-7, it1->data) <<
++      "reverse find by indirection for included element should result "
++      "in right value";
++
++    auto fake_data = std::make_shared<Elem>(-7);
++    auto it2 = c_heap.rfind(fake_data);
++    EXPECT_EQ(c_heap.cend(), it2) <<
++      "reverse find by indirection for not included element should fail";
++  }
++
++  {
++    auto it1 = c_heap.rfind(Elem(-7));
++    EXPECT_NE(c_heap.cend(), it1) <<
++      "reverse find by value for included element should succeed";
++    EXPECT_EQ(-7, it1->data) <<
++      "reverse find by value for included element should result "
++      "in right value";
++
++    auto it2 = c_heap.rfind(Elem(7));
++    EXPECT_EQ(c_heap.cend(), it2) <<
++      "reverse find by value for not included element should fail";
++  }
++}
++
++
++TEST_F(HeapFixture1, iterator_remove) {
++  auto it1 = heap.find(data7);
++  EXPECT_NE(heap.end(), it1) << "find for included element should succeed";
++
++  heap.remove(it1);
++
++  auto it2 = heap.find(data7);
++  EXPECT_EQ(heap.end(), it2) << "find for removed element should fail";
++
++  for (auto it3 = heap.begin(); it3 != heap.end(); ++it3) {
++    EXPECT_NE(-7, it3->data) <<
++      "iterating through heap should not find removed value";
++  }
++
++  // move through heap without -7
++  EXPECT_EQ(-12, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(-5, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(1, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(2, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(12, heap.top().data);
++  heap.pop();
++  EXPECT_EQ(99, heap.top().data);
++  heap.pop();
++}
++
++
++TEST_F(HeapFixture1, four_tops) {
++  Elem& top1 = heap.top();
++  EXPECT_EQ(-12, top1.data);
++
++  const Elem& top2 = heap.top();
++  EXPECT_EQ(-12, top2.data);
++
++  std::shared_ptr<Elem> top3 = heap.top_ind();
++  EXPECT_EQ(-12, top3->data);
++
++  const std::shared_ptr<Elem> top4 = heap.top_ind();
++  EXPECT_EQ(-12, top4->data);
++
++  const auto& c_heap = heap;
++
++  const Elem& top5 = c_heap.top();
++  EXPECT_EQ(-12, top5.data);
++
++  const std::shared_ptr<Elem> top6 = c_heap.top_ind();
++  EXPECT_EQ(-12, top6->data);
++}
++
++
++TEST_F(HeapFixture1, display_sorted) {
++  std::stringstream ss;
++
++  heap.display_sorted(ss);
++
++  std::string s = ss.str();
++
++  EXPECT_GT(s.length(), 0u);
++
++  auto negseven = s.find("-7");
++  EXPECT_NE(negseven, std::string::npos);
++
++  auto ninetynine = s.find("99");
++  EXPECT_NE(ninetynine, std::string::npos);
++
++  // index of -7 should be less than index of 99
++  EXPECT_LT(negseven, ninetynine);
++
++#if 0
++  std::cout << s << std::endl;
++#endif
++}
diff --cc src/dmclock/support/test/test_intrusive_heap.cc
index 00000000000,00000000000..a0ad07524e0
new file mode 100644
--- /dev/null
+++ b/src/dmclock/support/test/test_intrusive_heap.cc
@@@ -1,0 -1,0 +1,86 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++
++/*
++ * Copyright (C) 2016 Red Hat Inc.
++ */
++
++
++#include <string>
++#include <iostream>
++
++#include "intrusive_heap.h"
++
++
++struct TestCompare;
++struct TestIntruData;
++
++
++class Test1 {
++    friend TestCompare;
++    friend TestIntruData;
++
++    int data;
++    crimson::IntruHeapData heap_data;
++
++public:
++    Test1(int _data) : data(_data) {}
++
++    friend std::ostream& operator<<(std::ostream& out, const Test1& d) {
++        out << d.data << " (" << d.heap_data << ")";
++        return out;
++    }
++
++    int& the_data() { return data; }
++};
++
++
++struct TestCompare {
++    bool operator()(const Test1& d1, const Test1& d2) {
++        return d1.data < d2.data;
++    }
++};
++
++
++struct TestIntruData {
++    crimson::IntruHeapData& operator()(Test1& d) {
++        return d.heap_data;
++    }
++};
++
++
++int main(int argc, char** argv) {
++    Test1 d1(2);
++    Test1 d2(3);
++    Test1 d3(1);
++    Test1 d4(-5);
++
++    crimson::IntruHeap<Test1, TestIntruData, TestCompare> my_heap;
++
++    my_heap.push(d1);
++    my_heap.push(d2);
++    my_heap.push(d3);
++    my_heap.push(d4);
++    my_heap.push(Test1(-9));
++    my_heap.push(Test1(99));
++    my_heap.push(Test1(0));
++
++    std::cout << my_heap << std::endl;
++
++    auto& t = my_heap.top();
++    t.the_data() = 17;
++    my_heap.adjust_down(t);
++
++    std::cout << my_heap << std::endl;
++
++    my_heap.display_sorted(std::cout);
++
++    while (!my_heap.empty()) {
++        auto& top = my_heap.top();
++        std::cout << top << std::endl;
++        my_heap.pop();
++        std::cout << my_heap << std::endl;
++    }
++
++    return 0;
++}
diff --cc src/dmclock/test/CMakeLists.txt
index 00000000000,00000000000..e72810b56aa
new file mode 100644
--- /dev/null
+++ b/src/dmclock/test/CMakeLists.txt
@@@ -1,0 -1,0 +1,35 @@@
++include_directories(../src)
++include_directories(../support/src)
++include_directories(../sim/src)
++include_directories(${BOOST_INCLUDE_DIR})
++
++set(support_srcs ../sim/src/test_dmclock.cc)
++set(test_srcs
++  test_test_client.cc
++  test_dmclock_server.cc
++  test_dmclock_client.cc
++  )
++
++set_source_files_properties(${core_srcs} ${test_srcs}
++  PROPERTIES
++  COMPILE_FLAGS "${local_flags}"
++  )
++
++add_executable(dmclock-tests ${test_srcs} ${support_srcs})
++
++if (TARGET gtest AND TARGET gtest_main)
++  add_dependencies(dmclock-tests gtest gtest_main)
++  target_link_libraries(dmclock-tests
++    LINK_PRIVATE $<TARGET_FILE:dmclock>
++    pthread
++    $<TARGET_FILE:gtest>
++    $<TARGET_FILE:gtest_main>)
++else()
++  target_link_libraries(dmclock-tests
++    LINK_PRIVATE $<TARGET_FILE:dmclock> pthread ${GTEST_LIBRARY} ${GTEST_MAIN_LIBRARY})
++endif()
++  
++add_dependencies(dmclock-tests dmclock)
++
++add_test(NAME dmclock-tests
++  COMMAND $<TARGET_FILE:dmclock-tests>)
diff --cc src/dmclock/test/test_dmclock_client.cc
index 00000000000,00000000000..ee4172dc348
new file mode 100644
--- /dev/null
+++ b/src/dmclock/test/test_dmclock_client.cc
@@@ -1,0 -1,0 +1,219 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++/*
++ * Copyright (C) 2016 Red Hat Inc.
++ */
++
++
++#include <chrono>
++#include <mutex>
++#include <functional>
++#include <iostream>
++
++
++#include "dmclock_client.h"
++#include "dmclock_util.h"
++#include "gtest/gtest.h"
++
++
++namespace dmc = crimson::dmclock;
++
++
++namespace crimson {
++  namespace dmclock {
++
++    /*
++     * Allows us to test the code provided with the mutex provided locked.
++     */
++    static void test_locked(std::mutex& mtx, std::function<void()> code) {
++      std::lock_guard<std::mutex> l(mtx);
++      code();
++    }
++
++
++    TEST(dmclock_client, server_erase) {
++      using ServerId = int;
++      // using ClientId = int;
++
++      ServerId server = 101;
++      // ClientId client = 3;
++
++      // dmc::PhaseType resp_params = dmc::PhaseType::reservation;
++
++      dmc::ServiceTracker<ServerId> st(std::chrono::seconds(2),
++                                       std::chrono::seconds(3));
++
++      auto lock_st = [&](std::function<void()> code) {
++	test_locked(st.data_mtx, code);
++      };
++
++      /* The timeline should be as follows:
++       *
++       *     0 seconds : request created
++       *
++       *     1 seconds : map is size 1
++       *
++       * 2 seconds : clean notes first mark; +2 is base for further calcs
++       *
++       * 4 seconds : clean does nothing except makes another mark
++       *
++       *   5 seconds : when we're secheduled to erase (+2 + 3)
++       *
++       *     5 seconds : since the clean job hasn't run yet, map still size 1
++       *
++       * 6 seconds : clean erases server
++       *
++       *     7 seconds : verified server is gone (map size 0)
++       */
++
++      lock_st([&] () {
++	  EXPECT_EQ(0u, st.server_map.size()) <<
++	    "server map initially has size 0";
++	});
++
++      std::this_thread::sleep_for(std::chrono::seconds(1));
++
++      // call for side effects
++      (void) st.get_req_params(server);
++
++      lock_st([&] () {
++	  EXPECT_EQ(1u, st.server_map.size()) <<
++	    "server map has size 1 after first request";
++	});
++
++      std::this_thread::sleep_for(std::chrono::seconds(4));
++
++      lock_st([&] () {
++	  EXPECT_EQ(1u, st.server_map.size()) <<
++	    "server map has size 1 just before erase";
++	});
++
++      std::this_thread::sleep_for(std::chrono::seconds(2));
++
++      lock_st([&] () {
++	  EXPECT_EQ(0u, st.server_map.size()) <<
++	    "server map has size 0 just after erase";
++	});
++    } // TEST
++
++
++    TEST(dmclock_client, delta_rho_values) {
++      using ServerId = int;
++      // using ClientId = int;
++
++      ServerId server1 = 101;
++      ServerId server2 = 7;
++      // ClientId client = 3;
++
++      // RespParams<ServerId> resp_params(server, dmc::PhaseType::reservation);
++
++      dmc::ServiceTracker<ServerId> st(std::chrono::seconds(2),
++                                       std::chrono::seconds(3));
++
++      auto rp1 = st.get_req_params(server1);
++
++      EXPECT_EQ(1u, rp1.delta) <<
++	"delta should be 1 with no intervening responses by" <<
++	"other servers";
++      EXPECT_EQ(1u, rp1.rho) <<
++	"rho should be 1 with no intervening reservation responses by" <<
++	"other servers";
++
++      auto rp2 = st.get_req_params(server1);
++
++      EXPECT_EQ(1u, rp2.delta) <<
++	"delta should be 1 with no intervening responses by" <<
++	"other servers";
++      EXPECT_EQ(1u, rp2.rho) <<
++	"rho should be 1 with no intervening reservation responses by" <<
++	"other servers";
++
++      st.track_resp(server1, dmc::PhaseType::priority);
++
++      auto rp3 = st.get_req_params(server1);
++
++      EXPECT_EQ(1u, rp3.delta) <<
++	"delta should be 1 with no intervening responses by" <<
++	"other servers";
++      EXPECT_EQ(1u, rp3.rho) <<
++	"rho should be 1 with no intervening reservation responses by" <<
++	"other servers";
++
++      st.track_resp(server2, dmc::PhaseType::priority);
++
++      auto rp4 = st.get_req_params(server1);
++
++      EXPECT_EQ(2u, rp4.delta) <<
++	"delta should be 2 with one intervening priority response by " <<
++	"another server";
++      EXPECT_EQ(1u, rp4.rho) <<
++	"rho should be 1 with one intervening priority responses by " <<
++	"another server";
++
++      auto rp5 = st.get_req_params(server1);
++
++      EXPECT_EQ(1u, rp5.delta) <<
++	"delta should be 1 with no intervening responses by" <<
++	"other servers";
++      EXPECT_EQ(1u, rp5.rho) <<
++	"rho should be 1 with no intervening reservation responses by" <<
++	"other servers";
++
++      st.track_resp(server2, dmc::PhaseType::reservation);
++
++      auto rp6 = st.get_req_params(server1);
++
++      EXPECT_EQ(2u, rp6.delta) <<
++	"delta should be 2 with one intervening reservation response by " <<
++	"another server";
++      EXPECT_EQ(2u, rp6.rho) <<
++	"rho should be 2 with one intervening reservation responses by " <<
++	"another server";
++
++      // auto rp6_b = st.get_req_params(server2);
++
++      st.track_resp(server2, dmc::PhaseType::reservation);
++      st.track_resp(server1, dmc::PhaseType::priority);
++      st.track_resp(server2, dmc::PhaseType::priority);
++      st.track_resp(server2, dmc::PhaseType::reservation);
++      st.track_resp(server1, dmc::PhaseType::reservation);
++      st.track_resp(server1, dmc::PhaseType::priority);
++      st.track_resp(server2, dmc::PhaseType::priority);
++
++      auto rp7 = st.get_req_params(server1);
++
++      EXPECT_EQ(5u, rp7.delta) <<
++	"delta should be 5 with fourintervening responses by " <<
++	"another server";
++      EXPECT_EQ(3u, rp7.rho) <<
++	"rho should be 3 with two intervening reservation responses by " <<
++	"another server";
++
++      auto rp7b = st.get_req_params(server2);
++
++      EXPECT_EQ(4u, rp7b.delta) <<
++	"delta should be 4 with three intervening responses by " <<
++	"another server";
++      EXPECT_EQ(2u, rp7b.rho) <<
++	"rho should be 2 with one intervening reservation responses by " <<
++	"another server";
++
++      auto rp8 = st.get_req_params(server1);
++
++      EXPECT_EQ(1u, rp8.delta) <<
++	"delta should be 1 with no intervening responses by " <<
++	"another server";
++      EXPECT_EQ(1u, rp8.rho) <<
++	"rho should be 1 with no intervening reservation responses by " <<
++	"another server";
++
++      auto rp8b = st.get_req_params(server2);
++      EXPECT_EQ(1u, rp8b.delta) <<
++	"delta should be 1 with no intervening responses by " <<
++	"another server";
++      EXPECT_EQ(1u, rp8b.rho) <<
++	"rho should be 1 with no intervening reservation responses by " <<
++	"another server";
++    } // TEST
++  } // namespace dmclock
++} // namespace crimson
diff --cc src/dmclock/test/test_dmclock_server.cc
index 00000000000,00000000000..4555e377323
new file mode 100644
--- /dev/null
+++ b/src/dmclock/test/test_dmclock_server.cc
@@@ -1,0 -1,0 +1,826 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++/*
++ * Copyright (C) 2016 Red Hat Inc.
++ */
++
++
++#include <memory>
++#include <chrono>
++#include <iostream>
++#include <list>
++#include <vector>
++
++
++#include "dmclock_server.h"
++#include "dmclock_util.h"
++#include "gtest/gtest.h"
++
++
++namespace dmc = crimson::dmclock;
++
++
++// we need a request object; an empty one will do
++struct Request {
++};
++
++
++namespace crimson {
++  namespace dmclock {
++
++    /*
++     * Allows us to test the code provided with the mutex provided locked.
++     */
++    static void test_locked(std::mutex& mtx, std::function<void()> code) {
++      std::unique_lock<std::mutex> l(mtx);
++      code();
++    }
++
++
++    TEST(dmclock_server, bad_tag_deathtest) {
++      using ClientId = int;
++      using Queue = dmc::PullPriorityQueue<ClientId,Request>;
++      using QueueRef = std::unique_ptr<Queue>;
++
++      ClientId client1 = 17;
++      ClientId client2 = 18;
++
++      double reservation = 0.0;
++      double weight = 0.0;
++
++      dmc::ClientInfo ci1(reservation, weight, 0.0);
++      dmc::ClientInfo ci2(reservation, weight, 1.0);
++
++      auto client_info_f = [&] (ClientId c) -> dmc::ClientInfo {
++	if (client1 == c) return ci1;
++	else if (client2 == c) return ci2;
++	else {
++	  ADD_FAILURE() << "got request from neither of two clients";
++	  return ci1; // must return
++	}
++      };
++
++      QueueRef pq(new Queue(client_info_f, false));
++      Request req;
++      ReqParams req_params(1,1);
++
++      EXPECT_DEATH_IF_SUPPORTED(pq->add_request(req, client1, req_params),
++				"Assertion.*reservation.*max_tag.*"
++				"proportion.*max_tag") <<
++	"we should fail if a client tries to generate a reservation tag "
++	"where reservation and proportion are both 0";
++
++
++      EXPECT_DEATH_IF_SUPPORTED(pq->add_request(req, client2, req_params),
++				"Assertion.*reservation.*max_tag.*"
++				"proportion.*max_tag") <<
++	"we should fail if a client tries to generate a reservation tag "
++	"where reservation and proportion are both 0";
++    }
++
++
++    TEST(dmclock_server, client_idle_erase) {
++      using ClientId = int;
++      using Queue = dmc::PushPriorityQueue<ClientId,Request>;
++      int client = 17;
++      double reservation = 100.0;
++
++      dmc::ClientInfo ci(reservation, 1.0, 0.0);
++      auto client_info_f = [&] (ClientId c) -> dmc::ClientInfo { return ci; };
++      auto server_ready_f = [] () -> bool { return true; };
++      auto submit_req_f = [] (const ClientId& c,
++			      std::unique_ptr<Request> req,
++			      dmc::PhaseType phase) {
++	// empty; do nothing
++      };
++
++      Queue pq(client_info_f,
++	       server_ready_f,
++	       submit_req_f,
++	       std::chrono::seconds(3),
++	       std::chrono::seconds(5),
++	       std::chrono::seconds(2),
++	       false);
++
++      auto lock_pq = [&](std::function<void()> code) {
++	test_locked(pq.data_mtx, code);
++      };
++
++
++      /* The timeline should be as follows:
++       *
++       *     0 seconds : request created
++       *
++       *     1 seconds : map is size 1, idle is false
++       *
++       * 2 seconds : clean notes first mark; +2 is base for further calcs
++       *
++       * 4 seconds : clean does nothing except makes another mark
++       *
++       *   5 seconds : when we're secheduled to idle (+2 + 3)
++       *
++       * 6 seconds : clean idles client
++       *
++       *   7 seconds : when we're secheduled to erase (+2 + 5)
++       *
++       *     7 seconds : verified client is idle
++       *
++       * 8 seconds : clean erases client info
++       *
++       *     9 seconds : verified client is erased
++       */
++
++      lock_pq([&] () {
++	  EXPECT_EQ(0u, pq.client_map.size()) <<
++	    "client map initially has size 0";
++	});
++
++      Request req;
++      dmc::ReqParams req_params(1, 1);
++      pq.add_request_time(req, client, req_params, dmc::get_time());
++
++      std::this_thread::sleep_for(std::chrono::seconds(1));
++
++      lock_pq([&] () {
++	  EXPECT_EQ(1u, pq.client_map.size()) <<
++	    "client map has 1 after 1 client";
++	  EXPECT_FALSE(pq.client_map.at(client)->idle) <<
++	    "initially client map entry shows not idle.";
++	});
++
++      std::this_thread::sleep_for(std::chrono::seconds(6));
++
++      lock_pq([&] () {
++	  EXPECT_TRUE(pq.client_map.at(client)->idle) <<
++	    "after idle age client map entry shows idle.";
++	});
++
++      std::this_thread::sleep_for(std::chrono::seconds(2));
++
++      lock_pq([&] () {
++	  EXPECT_EQ(0u, pq.client_map.size()) <<
++	    "client map loses its entry after erase age";
++	});
++    } // TEST
++
++
++#if 0
++    TEST(dmclock_server, reservation_timing) {
++      using ClientId = int;
++      // NB? PUSH OR PULL
++      using Queue = std::unique_ptr<dmc::PriorityQueue<ClientId,Request>>;
++      using std::chrono::steady_clock;
++
++      int client = 17;
++
++      std::vector<dmc::Time> times;
++      std::mutex times_mtx;
++      using Guard = std::lock_guard<decltype(times_mtx)>;
++
++      // reservation every second
++      dmc::ClientInfo ci(1.0, 0.0, 0.0);
++      Queue pq;
++
++      auto client_info_f = [&] (ClientId c) -> dmc::ClientInfo { return ci; };
++      auto server_ready_f = [] () -> bool { return true; };
++      auto submit_req_f = [&] (const ClientId& c,
++			       std::unique_ptr<Request> req,
++			       dmc::PhaseType phase) {
++	{
++	  Guard g(times_mtx);
++	  times.emplace_back(dmc::get_time());
++	}
++	std::thread complete([&](){ pq->request_completed(); });
++	complete.detach();
++      };
++
++      // NB? PUSH OR PULL
++      pq = Queue(new dmc::PriorityQueue<ClientId,Request>(client_info_f,
++							  server_ready_f,
++							  submit_req_f,
++							  false));
++
++      Request req;
++      ReqParams<ClientId> req_params(client, 1, 1);
++
++      for (int i = 0; i < 5; ++i) {
++	pq->add_request_time(req, req_params, dmc::get_time());
++      }
++
++      {
++	Guard g(times_mtx);
++	std::this_thread::sleep_for(std::chrono::milliseconds(5500));
++	EXPECT_EQ(5, times.size()) <<
++	  "after 5.5 seconds, we should have 5 requests times at 1 second apart";
++      }
++    } // TEST
++#endif
++
++
++    TEST(dmclock_server, remove_by_req_filter) {
++      struct MyReq {
++	int id;
++
++	MyReq(int _id) :
++	  id(_id)
++	{
++	  // empty
++	}
++      }; // MyReq
++
++      using ClientId = int;
++      using Queue = dmc::PullPriorityQueue<ClientId,MyReq>;
++
++      ClientId client1 = 17;
++      ClientId client2 = 98;
++
++      dmc::ClientInfo info1(0.0, 1.0, 0.0);
++
++      auto client_info_f = [&] (ClientId c) -> dmc::ClientInfo {
++	return info1;
++      };
++
++      Queue pq(client_info_f, true);
++
++      EXPECT_EQ(0u, pq.client_count());
++      EXPECT_EQ(0u, pq.request_count());
++
++      ReqParams req_params(1,1);
++
++      pq.add_request(MyReq(1), client1, req_params);
++      pq.add_request(MyReq(11), client1, req_params);
++      pq.add_request(MyReq(2), client2, req_params);
++      pq.add_request(MyReq(0), client2, req_params);
++      pq.add_request(MyReq(13), client2, req_params);
++      pq.add_request(MyReq(2), client2, req_params);
++      pq.add_request(MyReq(13), client2, req_params);
++      pq.add_request(MyReq(98), client2, req_params);
++      pq.add_request(MyReq(44), client1, req_params);
++
++      EXPECT_EQ(2u, pq.client_count());
++      EXPECT_EQ(9u, pq.request_count());
++
++      pq.remove_by_req_filter([](const MyReq& r) -> bool {return 1 == r.id % 2;});
++
++      EXPECT_EQ(5u, pq.request_count());
++
++      std::list<MyReq> capture;
++      pq.remove_by_req_filter(
++	[&capture] (const MyReq& r) -> bool {
++	  if (0 == r.id % 2) {
++	    capture.push_front(r);
++	    return true;
++	  } else {
++	    return false;
++	  }
++	},
++	true);
++
++      EXPECT_EQ(0u, pq.request_count());
++      EXPECT_EQ(5u, capture.size());
++      int total = 0;
++      for (auto i : capture) {
++	total += i.id;
++      }
++      EXPECT_EQ(146, total) << " sum of captured items should be 146";
++    } // TEST
++
++
++    TEST(dmclock_server, remove_by_req_filter_ordering_forwards_visit) {
++      struct MyReq {
++	int id;
++
++	MyReq(int _id) :
++	  id(_id)
++	{
++	  // empty
++	}
++      }; // MyReq
++
++      using ClientId = int;
++      using Queue = dmc::PullPriorityQueue<ClientId,MyReq>;
++
++      ClientId client1 = 17;
++
++      dmc::ClientInfo info1(0.0, 1.0, 0.0);
++
++      auto client_info_f = [&] (ClientId c) -> dmc::ClientInfo {
++	return info1;
++      };
++
++      Queue pq(client_info_f, true);
++
++      EXPECT_EQ(0u, pq.client_count());
++      EXPECT_EQ(0u, pq.request_count());
++
++      ReqParams req_params(1,1);
++
++      pq.add_request(MyReq(1), client1, req_params);
++      pq.add_request(MyReq(2), client1, req_params);
++      pq.add_request(MyReq(3), client1, req_params);
++      pq.add_request(MyReq(4), client1, req_params);
++      pq.add_request(MyReq(5), client1, req_params);
++      pq.add_request(MyReq(6), client1, req_params);
++
++      EXPECT_EQ(1u, pq.client_count());
++      EXPECT_EQ(6u, pq.request_count());
++
++      // remove odd ids in forward order and append to end
++
++      std::vector<MyReq> capture;
++      pq.remove_by_req_filter(
++	[&capture] (const MyReq& r) -> bool {
++	  if (1 == r.id % 2) {
++	    capture.push_back(r);
++	    return true;
++	  } else {
++	    return false;
++	  }
++	},
++	false);
++
++      EXPECT_EQ(3u, pq.request_count());
++      EXPECT_EQ(3u, capture.size());
++      EXPECT_EQ(1, capture[0].id) << "items should come out in forward order";
++      EXPECT_EQ(3, capture[1].id) << "items should come out in forward order";
++      EXPECT_EQ(5, capture[2].id) << "items should come out in forward order";
++
++      // remove even ids in reverse order but insert at front so comes
++      // out forwards
++
++      std::vector<MyReq> capture2;
++      pq.remove_by_req_filter(
++	[&capture2] (const MyReq& r) -> bool {
++	  if (0 == r.id % 2) {
++	    capture2.insert(capture2.begin(), r);
++	    return true;
++	  } else {
++	    return false;
++	  }
++	},
++	false);
++
++      EXPECT_EQ(0u, pq.request_count());
++      EXPECT_EQ(3u, capture2.size());
++      EXPECT_EQ(6, capture2[0].id) << "items should come out in reverse order";
++      EXPECT_EQ(4, capture2[1].id) << "items should come out in reverse order";
++      EXPECT_EQ(2, capture2[2].id) << "items should come out in reverse order";
++    } // TEST
++
++
++    TEST(dmclock_server, remove_by_req_filter_ordering_backwards_visit) {
++      struct MyReq {
++	int id;
++
++	MyReq(int _id) :
++	  id(_id)
++	{
++	  // empty
++	}
++      }; // MyReq
++
++      using ClientId = int;
++      using Queue = dmc::PullPriorityQueue<ClientId,MyReq>;
++
++      ClientId client1 = 17;
++
++      dmc::ClientInfo info1(0.0, 1.0, 0.0);
++
++      auto client_info_f = [&] (ClientId c) -> dmc::ClientInfo {
++	return info1;
++      };
++
++      Queue pq(client_info_f, true);
++
++      EXPECT_EQ(0u, pq.client_count());
++      EXPECT_EQ(0u, pq.request_count());
++
++      ReqParams req_params(1,1);
++
++      pq.add_request(MyReq(1), client1, req_params);
++      pq.add_request(MyReq(2), client1, req_params);
++      pq.add_request(MyReq(3), client1, req_params);
++      pq.add_request(MyReq(4), client1, req_params);
++      pq.add_request(MyReq(5), client1, req_params);
++      pq.add_request(MyReq(6), client1, req_params);
++
++      EXPECT_EQ(1u, pq.client_count());
++      EXPECT_EQ(6u, pq.request_count());
++
++      // now remove odd ids in forward order
++
++      std::vector<MyReq> capture;
++      pq.remove_by_req_filter(
++	[&capture] (const MyReq& r) -> bool {
++	  if (1 == r.id % 2) {
++	    capture.insert(capture.begin(), r);
++	    return true;
++	  } else {
++	    return false;
++	  }
++	},
++	true);
++
++      EXPECT_EQ(3u, pq.request_count());
++      EXPECT_EQ(3u, capture.size());
++      EXPECT_EQ(1, capture[0].id) << "items should come out in forward order";
++      EXPECT_EQ(3, capture[1].id) << "items should come out in forward order";
++      EXPECT_EQ(5, capture[2].id) << "items should come out in forward order";
++
++      // now remove even ids in reverse order
++
++      std::vector<MyReq> capture2;
++      pq.remove_by_req_filter(
++	[&capture2] (const MyReq& r) -> bool {
++	  if (0 == r.id % 2) {
++	    capture2.push_back(r);
++	    return true;
++	  } else {
++	    return false;
++	  }
++	},
++	true);
++
++      EXPECT_EQ(0u, pq.request_count());
++      EXPECT_EQ(3u, capture2.size());
++      EXPECT_EQ(6, capture2[0].id) << "items should come out in reverse order";
++      EXPECT_EQ(4, capture2[1].id) << "items should come out in reverse order";
++      EXPECT_EQ(2, capture2[2].id) << "items should come out in reverse order";
++    } // TEST
++
++
++    TEST(dmclock_server, remove_by_client) {
++      struct MyReq {
++	int id;
++
++	MyReq(int _id) :
++	  id(_id)
++	{
++	  // empty
++	}
++      }; // MyReq
++
++      using ClientId = int;
++      using Queue = dmc::PullPriorityQueue<ClientId,MyReq>;
++
++      ClientId client1 = 17;
++      ClientId client2 = 98;
++
++      dmc::ClientInfo info1(0.0, 1.0, 0.0);
++
++      auto client_info_f = [&] (ClientId c) -> dmc::ClientInfo {
++	return info1;
++      };
++
++      Queue pq(client_info_f, true);
++
++      EXPECT_EQ(0u, pq.client_count());
++      EXPECT_EQ(0u, pq.request_count());
++
++      ReqParams req_params(1,1);
++
++      pq.add_request(MyReq(1), client1, req_params);
++      pq.add_request(MyReq(11), client1, req_params);
++      pq.add_request(MyReq(2), client2, req_params);
++      pq.add_request(MyReq(0), client2, req_params);
++      pq.add_request(MyReq(13), client2, req_params);
++      pq.add_request(MyReq(2), client2, req_params);
++      pq.add_request(MyReq(13), client2, req_params);
++      pq.add_request(MyReq(98), client2, req_params);
++      pq.add_request(MyReq(44), client1, req_params);
++
++      EXPECT_EQ(2u, pq.client_count());
++      EXPECT_EQ(9u, pq.request_count());
++
++      std::list<MyReq> removed;
++
++      pq.remove_by_client(client1,
++			  true,
++			  [&removed] (const MyReq& r) {
++			    removed.push_front(r);
++			  });
++
++      EXPECT_EQ(3u, removed.size());
++      EXPECT_EQ(1, removed.front().id);
++      removed.pop_front();
++      EXPECT_EQ(11, removed.front().id);
++      removed.pop_front();
++      EXPECT_EQ(44, removed.front().id);
++      removed.pop_front();
++
++      EXPECT_EQ(6u, pq.request_count());
++
++      Queue::PullReq pr = pq.pull_request();
++      EXPECT_TRUE(pr.is_retn());
++      EXPECT_EQ(2, pr.get_retn().request->id);
++
++      pr = pq.pull_request();
++      EXPECT_TRUE(pr.is_retn());
++      EXPECT_EQ(0, pr.get_retn().request->id);
++
++      pq.remove_by_client(client2);
++      EXPECT_EQ(0u, pq.request_count()) <<
++	"after second client removed, none left";
++    } // TEST
++
++
++    TEST(dmclock_server_pull, pull_weight) {
++      using ClientId = int;
++      using Queue = dmc::PullPriorityQueue<ClientId,Request>;
++      using QueueRef = std::unique_ptr<Queue>;
++
++      ClientId client1 = 17;
++      ClientId client2 = 98;
++
++      dmc::ClientInfo info1(0.0, 1.0, 0.0);
++      dmc::ClientInfo info2(0.0, 2.0, 0.0);
++
++      QueueRef pq;
++
++      auto client_info_f = [&] (ClientId c) -> dmc::ClientInfo {
++	if (client1 == c) return info1;
++	else if (client2 == c) return info2;
++	else {
++	  ADD_FAILURE() << "client info looked up for non-existant client";
++	  return info1;
++	}
++      };
++
++      pq = QueueRef(new Queue(client_info_f, false));
++
++      Request req;
++      ReqParams req_params(1,1);
++
++      auto now = dmc::get_time();
++
++      for (int i = 0; i < 5; ++i) {
++	pq->add_request(req, client1, req_params);
++	pq->add_request(req, client2, req_params);
++	now += 0.0001;
++      }
++
++      int c1_count = 0;
++      int c2_count = 0;
++      for (int i = 0; i < 6; ++i) {
++	Queue::PullReq pr = pq->pull_request();
++	EXPECT_EQ(Queue::NextReqType::returning, pr.type);
++	auto& retn = boost::get<Queue::PullReq::Retn>(pr.data);
++
++	if (client1 == retn.client) ++c1_count;
++	else if (client2 == retn.client) ++c2_count;
++	else ADD_FAILURE() << "got request from neither of two clients";
++
++	EXPECT_EQ(PhaseType::priority, retn.phase);
++      }
++
++      EXPECT_EQ(2, c1_count) <<
++	"one-third of request should have come from first client";
++      EXPECT_EQ(4, c2_count) <<
++	"two-thirds of request should have come from second client";
++    }
++
++
++    TEST(dmclock_server_pull, pull_reservation) {
++      using ClientId = int;
++      using Queue = dmc::PullPriorityQueue<ClientId,Request>;
++      using QueueRef = std::unique_ptr<Queue>;
++
++      ClientId client1 = 52;
++      ClientId client2 = 8;
++
++      dmc::ClientInfo info1(2.0, 0.0, 0.0);
++      dmc::ClientInfo info2(1.0, 0.0, 0.0);
++
++      auto client_info_f = [&] (ClientId c) -> dmc::ClientInfo {
++	if (client1 == c) return info1;
++	else if (client2 == c) return info2;
++	else {
++	  ADD_FAILURE() << "client info looked up for non-existant client";
++	  return info1;
++	}
++      };
++
++      QueueRef pq(new Queue(client_info_f, false));
++
++      Request req;
++      ReqParams req_params(1,1);
++
++      // make sure all times are well before now
++      auto old_time = dmc::get_time() - 100.0;
++
++      for (int i = 0; i < 5; ++i) {
++	pq->add_request_time(req, client1, req_params, old_time);
++	pq->add_request_time(req, client2, req_params, old_time);
++	old_time += 0.001;
++      }
++
++      int c1_count = 0;
++      int c2_count = 0;
++
++      for (int i = 0; i < 6; ++i) {
++	Queue::PullReq pr = pq->pull_request();
++	EXPECT_EQ(Queue::NextReqType::returning, pr.type);
++	auto& retn = boost::get<Queue::PullReq::Retn>(pr.data);
++
++	if (client1 == retn.client) ++c1_count;
++	else if (client2 == retn.client) ++c2_count;
++	else ADD_FAILURE() << "got request from neither of two clients";
++
++	EXPECT_EQ(PhaseType::reservation, retn.phase);
++      }
++
++      EXPECT_EQ(4, c1_count) <<
++	"two-thirds of request should have come from first client";
++      EXPECT_EQ(2, c2_count) <<
++	"one-third of request should have come from second client";
++    } // dmclock_server_pull.pull_reservation
++
++
++    // This test shows what happens when a request can be ready (under
++    // limit) but not schedulable since proportion tag is 0. We expect
++    // to get some future and none responses.
++    TEST(dmclock_server_pull, ready_and_under_limit) {
++      using ClientId = int;
++      using Queue = dmc::PullPriorityQueue<ClientId,Request>;
++      using QueueRef = std::unique_ptr<Queue>;
++
++      ClientId client1 = 52;
++      ClientId client2 = 8;
++
++      dmc::ClientInfo info1(1.0, 0.0, 0.0);
++      dmc::ClientInfo info2(1.0, 0.0, 0.0);
++
++      auto client_info_f = [&] (ClientId c) -> dmc::ClientInfo {
++	if (client1 == c) return info1;
++	else if (client2 == c) return info2;
++	else {
++	  ADD_FAILURE() << "client info looked up for non-existant client";
++	  return info1;
++	}
++      };
++
++      QueueRef pq(new Queue(client_info_f, false));
++
++      Request req;
++      ReqParams req_params(1,1);
++
++      // make sure all times are well before now
++      auto start_time = dmc::get_time() - 100.0;
++
++      // add six requests; for same client reservations spaced one apart
++      for (int i = 0; i < 3; ++i) {
++	pq->add_request_time(req, client1, req_params, start_time);
++	pq->add_request_time(req, client2, req_params, start_time);
++      }
++
++      Queue::PullReq pr = pq->pull_request(start_time + 0.5);
++      EXPECT_EQ(Queue::NextReqType::returning, pr.type);
++
++      pr = pq->pull_request(start_time + 0.5);
++      EXPECT_EQ(Queue::NextReqType::returning, pr.type);
++
++      pr = pq->pull_request(start_time + 0.5);
++      EXPECT_EQ(Queue::NextReqType::future, pr.type) <<
++	"too soon for next reservation";
++
++      pr = pq->pull_request(start_time + 1.5);
++      EXPECT_EQ(Queue::NextReqType::returning, pr.type);
++
++      pr = pq->pull_request(start_time + 1.5);
++      EXPECT_EQ(Queue::NextReqType::returning, pr.type);
++
++      pr = pq->pull_request(start_time + 1.5);
++      EXPECT_EQ(Queue::NextReqType::future, pr.type) <<
++	"too soon for next reservation";
++
++      pr = pq->pull_request(start_time + 2.5);
++      EXPECT_EQ(Queue::NextReqType::returning, pr.type);
++
++      pr = pq->pull_request(start_time + 2.5);
++      EXPECT_EQ(Queue::NextReqType::returning, pr.type);
++
++      pr = pq->pull_request(start_time + 2.5);
++      EXPECT_EQ(Queue::NextReqType::none, pr.type) << "no more requests left";
++    }
++
++
++    TEST(dmclock_server_pull, pull_none) {
++      using ClientId = int;
++      using Queue = dmc::PullPriorityQueue<ClientId,Request>;
++      using QueueRef = std::unique_ptr<Queue>;
++
++      dmc::ClientInfo info(1.0, 1.0, 1.0);
++
++      auto client_info_f = [&] (ClientId c) -> dmc::ClientInfo {
++	return info;
++      };
++
++      QueueRef pq(new Queue(client_info_f, false));
++
++      // Request req;
++      ReqParams req_params(1,1);
++
++      auto now = dmc::get_time();
++
++      Queue::PullReq pr = pq->pull_request(now + 100);
++
++      EXPECT_EQ(Queue::NextReqType::none, pr.type);
++    }
++
++
++    TEST(dmclock_server_pull, pull_future) {
++      using ClientId = int;
++      using Queue = dmc::PullPriorityQueue<ClientId,Request>;
++      using QueueRef = std::unique_ptr<Queue>;
++
++      ClientId client1 = 52;
++      // ClientId client2 = 8;
++
++      dmc::ClientInfo info(1.0, 0.0, 1.0);
++
++      auto client_info_f = [&] (ClientId c) -> dmc::ClientInfo {
++	return info;
++      };
++
++      QueueRef pq(new Queue(client_info_f, false));
++
++      Request req;
++      ReqParams req_params(1,1);
++
++      // make sure all times are well before now
++      auto now = dmc::get_time();
++
++      pq->add_request_time(req, client1, req_params, now + 100);
++      Queue::PullReq pr = pq->pull_request(now);
++
++      EXPECT_EQ(Queue::NextReqType::future, pr.type);
++
++      Time when = boost::get<Time>(pr.data);
++      EXPECT_EQ(now + 100, when);
++    }
++
++
++    TEST(dmclock_server_pull, pull_future_limit_break_weight) {
++      using ClientId = int;
++      using Queue = dmc::PullPriorityQueue<ClientId,Request>;
++      using QueueRef = std::unique_ptr<Queue>;
++
++      ClientId client1 = 52;
++      // ClientId client2 = 8;
++
++      dmc::ClientInfo info(0.0, 1.0, 1.0);
++
++      auto client_info_f = [&] (ClientId c) -> dmc::ClientInfo {
++	return info;
++      };
++
++      QueueRef pq(new Queue(client_info_f, true));
++
++      Request req;
++      ReqParams req_params(1,1);
++
++      // make sure all times are well before now
++      auto now = dmc::get_time();
++
++      pq->add_request_time(req, client1, req_params, now + 100);
++      Queue::PullReq pr = pq->pull_request(now);
++
++      EXPECT_EQ(Queue::NextReqType::returning, pr.type);
++
++      auto& retn = boost::get<Queue::PullReq::Retn>(pr.data);
++      EXPECT_EQ(client1, retn.client);
++    }
++
++
++    TEST(dmclock_server_pull, pull_future_limit_break_reservation) {
++      using ClientId = int;
++      using Queue = dmc::PullPriorityQueue<ClientId,Request>;
++      using QueueRef = std::unique_ptr<Queue>;
++
++      ClientId client1 = 52;
++      // ClientId client2 = 8;
++
++      dmc::ClientInfo info(1.0, 0.0, 1.0);
++
++      auto client_info_f = [&] (ClientId c) -> dmc::ClientInfo {
++	return info;
++      };
++
++      QueueRef pq(new Queue(client_info_f, true));
++
++      Request req;
++      ReqParams req_params(1,1);
++
++      // make sure all times are well before now
++      auto now = dmc::get_time();
++
++      pq->add_request_time(req, client1, req_params, now + 100);
++      Queue::PullReq pr = pq->pull_request(now);
++
++      EXPECT_EQ(Queue::NextReqType::returning, pr.type);
++
++      auto& retn = boost::get<Queue::PullReq::Retn>(pr.data);
++      EXPECT_EQ(client1, retn.client);
++    }
++  } // namespace dmclock
++} // namespace crimson
diff --cc src/dmclock/test/test_test_client.cc
index 00000000000,00000000000..6015cb9bf7b
new file mode 100644
--- /dev/null
+++ b/src/dmclock/test/test_test_client.cc
@@@ -1,0 -1,0 +1,123 @@@
++// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
++// vim: ts=8 sw=2 smarttab
++/*
++ * Copyright (C) 2016 Red Hat Inc.
++ */
++
++#include <atomic>
++#include <thread>
++#include <chrono>
++#include <iostream>
++
++#include "gtest/gtest.h"
++
++#include "sim_recs.h"
++#include "sim_client.h"
++
++#include "test_dmclock.h"
++
++
++using namespace std::placeholders;
++
++namespace dmc = crimson::dmclock;
++namespace test = crimson::test_dmc;
++namespace sim = crimson::qos_simulation;
++
++using TimePoint = std::chrono::time_point<std::chrono::system_clock>;
++
++static TimePoint now() { return std::chrono::system_clock::now(); }
++
++
++TEST(test_client, full_bore_timing) {
++  std::atomic_ulong count(0);
++
++  ServerId server_id = 3;
++
++  sim::TestResponse resp(0);
++  dmc::PhaseType resp_params = dmc::PhaseType::priority;
++  test::DmcClient* client;
++
++  auto start = now();
++  client =
++    new test::DmcClient(ClientId(0),
++			[&] (const ServerId& server,
++			     const sim::TestRequest& req,
++			     const ClientId& client_id,
++			     const dmc::ReqParams& req_params) {
++			  ++count;
++			  client->receive_response(resp, client_id, resp_params);
++			},
++			[&] (const uint64_t seed) -> ServerId& {
++			  return server_id;
++			},
++			test::dmc_client_accumulate_f,
++			1000, // ops to run
++			100, // iops goal
++			5); // outstanding ops allowed
++  client->wait_until_done();
++  auto end = now();
++  EXPECT_EQ(1000u, count) << "didn't get right number of ops";
++
++  int milliseconds = (end - start) / std::chrono::milliseconds(1);
++  EXPECT_LT(10000, milliseconds) << "timing too fast to be correct";
++  EXPECT_GT(12000, milliseconds) << "timing suspiciously slow";
++}
++
++
++TEST(test_client, paused_timing) {
++  std::atomic_ulong count(0);
++  std::atomic_ulong unresponded_count(0);
++  std::atomic_bool auto_respond(false);
++
++  ClientId my_client_id = 0;
++  ServerId server_id = 3;
++
++  sim::TestResponse resp(0);
++  dmc::PhaseType resp_params = dmc::PhaseType::priority;
++  test::DmcClient* client;
++
++  auto start = now();
++  client =
++    new test::DmcClient(my_client_id,
++			[&] (const ServerId& server,
++			     const sim::TestRequest& req,
++			     const ClientId& client_id,
++			     const dmc::ReqParams& req_params) {
++			  ++count;
++			  if (auto_respond.load()) {
++			    client->receive_response(resp, client_id, resp_params);
++			  } else {
++			    ++unresponded_count;
++			  }
++			},
++			[&] (const uint64_t seed) -> ServerId& {
++			  return server_id;
++			},
++			test::dmc_client_accumulate_f,
++
++			1000, // ops to run
++			100, // iops goal
++			50); // outstanding ops allowed
++  std::thread t([&]() {
++      std::this_thread::sleep_for(std::chrono::seconds(5));
++      EXPECT_EQ(50u, unresponded_count.load()) <<
++	"should have 50 unresponded calls";
++      auto_respond = true;
++      // respond to those 50 calls
++      for(int i = 0; i < 50; ++i) {
++	client->receive_response(resp, my_client_id, resp_params);
++	--unresponded_count;
++      }
++    });
++
++  client->wait_until_done();
++  auto end = now();
++  int milliseconds = (end - start) / std::chrono::milliseconds(1);
++
++  // the 50 outstanding ops allowed means the first half-second of
++  // requests get responded to during the 5 second pause. So we have
++  // to adjust our expectations by a half-second.
++  EXPECT_LT(15000 - 500, milliseconds) << "timing too fast to be correct";
++  EXPECT_GT(17000 - 500, milliseconds) << "timing suspiciously slow";
++  t.join();
++}