From 0bca9fe991c7e1f623d2a387f54e63f18d3227eb Mon Sep 17 00:00:00 2001
From: "J. Eric Ivancich" <ivancich@redhat.com>
Date: Fri, 28 Apr 2017 17:13:42 -0400
Subject: [PATCH] Squashed 'src/dmclock/' content from commit d6586d7

git-subtree-dir: src/dmclock
git-subtree-split: d6586d73679f4a1bdf335235d309e2352f0c76c6
---
 .gitignore                                   |    4 +
 CMakeLists.txt                               |   32 +
 README.md                                    |   45 +
 benchmark/README.md                          |   42 +
 benchmark/configs/dmc_sim_100_100.conf       |   31 +
 benchmark/configs/dmc_sim_8_6.conf           |   43 +
 benchmark/data_gen.sh                        |   73 +
 benchmark/data_parser.py                     |  191 +++
 benchmark/plot_gen.sh                        |   60 +
 benchmark/run.sh                             |   24 +
 cmake/modules/Findboost.cmake                |   15 +
 cmake/modules/Findgtest.cmake                |   48 +
 dmclock-config.cmake.in                      |   17 +
 dmclock-targets.cmake                        |    1 +
 sim/CMakeLists.txt                           |    1 +
 sim/dmc_sim_100th.conf                       |   32 +
 sim/dmc_sim_example.conf                     |   43 +
 sim/src/CMakeLists.txt                       |   42 +
 sim/src/ConfUtils.cc                         |  574 +++++++
 sim/src/ConfUtils.h                          |   83 +
 sim/src/config.cc                            |  171 ++
 sim/src/config.h                             |  138 ++
 sim/src/sim_client.h                         |  329 ++++
 sim/src/sim_recs.h                           |  121 ++
 sim/src/sim_server.h                         |  225 +++
 sim/src/simulate.h                           |  430 +++++
 sim/src/ssched/ssched_client.h               |   44 +
 sim/src/ssched/ssched_recs.h                 |   37 +
 sim/src/ssched/ssched_server.h               |  182 ++
 sim/src/str_list.cc                          |  106 ++
 sim/src/str_list.h                           |   94 ++
 sim/src/test_dmclock.cc                      |   40 +
 sim/src/test_dmclock.h                       |   56 +
 sim/src/test_dmclock_main.cc                 |  322 ++++
 sim/src/test_ssched.cc                       |   33 +
 sim/src/test_ssched.h                        |   57 +
 sim/src/test_ssched_main.cc                  |  187 +++
 src/CMakeLists.txt                           |   19 +
 src/dmclock_client.h                         |  194 +++
 src/dmclock_recs.h                           |   61 +
 src/dmclock_server.h                         | 1588 ++++++++++++++++++
 src/dmclock_util.cc                          |   27 +
 src/dmclock_util.h                           |   45 +
 support/CMakeLists.txt                       |    1 +
 support/src/debug.h                          |   17 +
 support/src/heap.h                           |  240 +++
 support/src/indirect_intrusive_heap.h        |  549 ++++++
 support/src/intrusive_heap.h                 |  214 +++
 support/src/profile.h                        |  114 ++
 support/src/run_every.cc                     |   73 +
 support/src/run_every.h                      |   68 +
 support/test/CMakeLists.txt                  |   29 +
 support/test/test_ind_intru_heap.cc          |   82 +
 support/test/test_indirect_intrusive_heap.cc |  930 ++++++++++
 support/test/test_intrusive_heap.cc          |   86 +
 test/CMakeLists.txt                          |   35 +
 test/test_dmclock_client.cc                  |  219 +++
 test/test_dmclock_server.cc                  |  826 +++++++++
 test/test_test_client.cc                     |  123 ++
 59 files changed, 9513 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 CMakeLists.txt
 create mode 100644 README.md
 create mode 100644 benchmark/README.md
 create mode 100644 benchmark/configs/dmc_sim_100_100.conf
 create mode 100644 benchmark/configs/dmc_sim_8_6.conf
 create mode 100755 benchmark/data_gen.sh
 create mode 100755 benchmark/data_parser.py
 create mode 100755 benchmark/plot_gen.sh
 create mode 100755 benchmark/run.sh
 create mode 100644 cmake/modules/Findboost.cmake
 create mode 100644 cmake/modules/Findgtest.cmake
 create mode 100644 dmclock-config.cmake.in
 create mode 100644 dmclock-targets.cmake
 create mode 100644 sim/CMakeLists.txt
 create mode 100644 sim/dmc_sim_100th.conf
 create mode 100644 sim/dmc_sim_example.conf
 create mode 100644 sim/src/CMakeLists.txt
 create mode 100644 sim/src/ConfUtils.cc
 create mode 100644 sim/src/ConfUtils.h
 create mode 100644 sim/src/config.cc
 create mode 100644 sim/src/config.h
 create mode 100644 sim/src/sim_client.h
 create mode 100644 sim/src/sim_recs.h
 create mode 100644 sim/src/sim_server.h
 create mode 100644 sim/src/simulate.h
 create mode 100644 sim/src/ssched/ssched_client.h
 create mode 100644 sim/src/ssched/ssched_recs.h
 create mode 100644 sim/src/ssched/ssched_server.h
 create mode 100644 sim/src/str_list.cc
 create mode 100644 sim/src/str_list.h
 create mode 100644 sim/src/test_dmclock.cc
 create mode 100644 sim/src/test_dmclock.h
 create mode 100644 sim/src/test_dmclock_main.cc
 create mode 100644 sim/src/test_ssched.cc
 create mode 100644 sim/src/test_ssched.h
 create mode 100644 sim/src/test_ssched_main.cc
 create mode 100644 src/CMakeLists.txt
 create mode 100644 src/dmclock_client.h
 create mode 100644 src/dmclock_recs.h
 create mode 100644 src/dmclock_server.h
 create mode 100644 src/dmclock_util.cc
 create mode 100644 src/dmclock_util.h
 create mode 100644 support/CMakeLists.txt
 create mode 100644 support/src/debug.h
 create mode 100644 support/src/heap.h
 create mode 100644 support/src/indirect_intrusive_heap.h
 create mode 100644 support/src/intrusive_heap.h
 create mode 100644 support/src/profile.h
 create mode 100644 support/src/run_every.cc
 create mode 100644 support/src/run_every.h
 create mode 100644 support/test/CMakeLists.txt
 create mode 100644 support/test/test_ind_intru_heap.cc
 create mode 100644 support/test/test_indirect_intrusive_heap.cc
 create mode 100644 support/test/test_intrusive_heap.cc
 create mode 100644 test/CMakeLists.txt
 create mode 100644 test/test_dmclock_client.cc
 create mode 100644 test/test_dmclock_server.cc
 create mode 100644 test/test_test_client.cc

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000000..c6ddef2752b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+*~
+*.dSYM
+*.o
+build*
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 00000000000..428863dc496
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,32 @@
+cmake_minimum_required(VERSION 2.8.11)
+
+set(CMAKE_CXX_FLAGS "-std=c++11 -Wno-write-strings ${CMAKE_CXX_FLAGS}")
+
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/modules/")
+
+if(DO_NOT_DELAY_TAG_CALC)
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDO_NOT_DELAY_TAG_CALC")
+endif()
+
+if(K_WAY_HEAP)
+  if(K_WAY_HEAP LESS 2)
+    message(FATAL_ERROR "K_WAY_HEAP value should be at least 2")
+  else()
+    set(CMAKE_CXX_SIM_FLAGS "-DK_WAY_HEAP=${K_WAY_HEAP}")
+  endif()
+endif()
+
+if (NOT(TARGET gtest AND TARGET gtest_main))
+  find_package(gtest REQUIRED)
+  include_directories(${GTEST_INCLUDE_DIRS})
+endif()
+
+find_package(Boost REQUIRED)
+include_directories(${Boost_INCLUDE_DIRS})
+
+add_subdirectory(src)
+add_subdirectory(sim)
+add_subdirectory(support)
+
+enable_testing()
+add_subdirectory(test)
diff --git a/README.md b/README.md
new file mode 100644
index 00000000000..ab67295b153
--- /dev/null
+++ b/README.md
@@ -0,0 +1,45 @@
+# dmclock
+
+This repository contains C++ 11 code that implements the dmclock
+distributed quality of service algorithm. See __mClock: Handling
+Throughput Variability for Hypervisor IO Scheduling__ by Gulati,
+Merchant, and Varman for a description of the algorithm.
+
+## Running cmake
+
+When running cmake, set the build type with either:
+
+    -DCMAKE_BUILD_TYPE=Debug
+    -DCMAKE_BUILD_TYPE=Release
+
+To turn on profiling, run cmake with an additional:
+
+    -DPROFILE=yes
+
+An optimization/fix to the published algorithm has been added and is
+on by default. To disable this optimization/fix run cmake with:
+
+    -DDO_NOT_DELAY_TAG_CALC=yes
+
+## Running make
+
+### Building the dmclock library
+
+The `make` command builds a library libdmclock.a. That plus the header
+files in the src directory allow one to use the implementation in
+their code.
+
+### Building unit tests
+
+The `make dmclock-tests` command builds unit tests.
+
+### Building simulations
+
+The `make dmclock-sims` command builds two simulations -- *dmc_sim*
+and *ssched_sim* -- which incorporate, respectively, the dmclock
+priority queue or a very simple scheduler for comparison. Other
+priority queue implementations could be added in the future.
+
+## dmclock API
+
+To be written....
diff --git a/benchmark/README.md b/benchmark/README.md
new file mode 100644
index 00000000000..d945e986fc1
--- /dev/null
+++ b/benchmark/README.md
@@ -0,0 +1,42 @@
+# dmclock benchmarking
+
+**IMPORTANT**: now that K_WAY_HEAP is no longer allowed to have the
+value 1, the shell and Python scripts that generate the PDFs no longer
+work exactly correctly. Some effort to debug is necessary.
+
+This directory contains scripts to evaluate effects of different
+branching-factors (k=1 to k=11) in the IndirectIntrusiveHeap
+data-structure. IndirectIntrusiveHeap is now a k-way heap, so finding
+an ideal value for k (i.e., k=2 or k=3) for a particular work-load is
+important. Also, it is well-documented that the right choice of
+k-value improves the caching behaviour [Syed -- citation needed
+here]. As a result, the overall performance of an application using
+k-way heap increases significantly [Syed -- citation needed here].
+
+A rule of thumb is the following:
+	if number of elements are <= 6, use k=1
+	otherwise, use k=3.
+
+## Prerequisites
+
+requires python 2.7, gnuplot, and awk.
+  
+## Running benchmark
+
+./run.sh [name_of_the_output] [k_way] [repeat] # [Syed -- last two command line args do not work]
+
+The "run.sh" script looks for config files in the "configs" directory,
+and the final output is generated as
+"name_of_the_output.pdf". Internally, "run.sh" calls other scripts
+such as data_gen.sh, data_parser.py, and plot_gen.sh.
+
+## Modifying parameters
+
+To modify k-value and/or the amount of times each simulation is
+repeated, modify the following two variables in "run.sh" file:
+
+    k_way=[your_value]
+    repeat=[your_value]
+
+For example, k_way=3 means, the benchmark will compare simulations
+using 1-way, 2-way, and 3-way heaps.
diff --git a/benchmark/configs/dmc_sim_100_100.conf b/benchmark/configs/dmc_sim_100_100.conf
new file mode 100644
index 00000000000..c93d4c71f6d
--- /dev/null
+++ b/benchmark/configs/dmc_sim_100_100.conf
@@ -0,0 +1,31 @@
+[global]
+server_groups = 1
+client_groups = 2
+server_random_selection = true
+server_soft_limit = true
+
+[server.0]
+server_count = 100
+server_iops  = 160
+
+[client.0]
+client_count = 99
+client_wait = 0
+client_total_ops = 10000
+client_server_select_range = 100
+client_iops_goal = 200
+client_outstanding_ops = 32
+client_reservation = 100.0
+client_limit = 0.0
+client_weight = 1.0
+
+[client.1]
+client_count = 1
+client_wait = 10
+client_total_ops = 10000
+client_server_select_range = 100
+client_iops_goal = 200
+client_outstanding_ops = 32
+client_reservation = 100.0
+client_limit = 0.0
+client_weight = 1.0
diff --git a/benchmark/configs/dmc_sim_8_6.conf b/benchmark/configs/dmc_sim_8_6.conf
new file mode 100644
index 00000000000..28aeb401d44
--- /dev/null
+++ b/benchmark/configs/dmc_sim_8_6.conf
@@ -0,0 +1,43 @@
+[global]
+server_groups = 1
+client_groups = 3
+server_random_selection = true
+server_soft_limit = true
+
+[client.0]
+client_count = 2
+client_wait = 0
+client_total_ops = 1000
+client_server_select_range = 8
+client_iops_goal = 200
+client_outstanding_ops = 32
+client_reservation = 0.0
+client_limit = 0.0
+client_weight = 1.0
+
+[client.1]
+client_count = 2
+client_wait = 5
+client_total_ops = 1000
+client_server_select_range = 8
+client_iops_goal = 200
+client_outstanding_ops = 32
+client_reservation = 20.0
+client_limit = 40.0
+client_weight = 1.0
+
+[client.2]
+client_count = 2
+client_wait = 10
+client_total_ops = 1000
+client_server_select_range = 8
+client_iops_goal = 200
+client_outstanding_ops = 32
+client_reservation = 0.0
+client_limit = 50.0
+client_weight = 2.0
+
+
+[server.0]
+server_count = 8
+server_iops  = 160
diff --git a/benchmark/data_gen.sh b/benchmark/data_gen.sh
new file mode 100755
index 00000000000..80a77bd9a1a
--- /dev/null
+++ b/benchmark/data_gen.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+config_dir="configs"
+repeat=2 #5
+
+# parameter check -- output_file name
+if [ "$1" != "" ]; then
+  output_file="$1"
+else
+  echo "Please provide the name of the output file"
+  exit
+fi
+
+# parameter check -- k-value
+if [ "$2" != "" ]; then
+  k_way="$2"
+else
+  echo "Please provide the maximum K_WAY value"
+  exit
+fi
+
+# parameter check --repeat
+if [ "$3" != "" ]; then
+  repeat="$3"
+fi
+
+echo "k-way:$k_way, num_repeat:$repeat"
+
+# create simulators in different directories 
+k=2
+while [ $k -le $k_way ]
+do
+  mkdir "build_$k"
+  cd "build_$k"
+  rm -rf *
+  cmake -DCMAKE_BUILD_TYPE=Release -DK_WAY_HEAP=$k ../../.
+  make dmclock-sims
+  cd ..
+  
+  k=$(( $k + 1 ))
+done
+
+# run simulators 
+echo '' > $output_file
+for config in "$config_dir"/*.conf
+do
+  k=2
+  while [ $k -le $k_way ]
+  do
+    cd "build_$k"
+    
+    # repeat same experiment
+    i=0
+    while [ $i -lt $repeat ]
+    do  
+      i=$(( $i + 1 ))
+      
+      # clear cache first
+      sync
+      #sudo sh -c 'echo 1 >/proc/sys/vm/drop_caches'
+      #sudo sh -c 'echo 2 >/proc/sys/vm/drop_caches'
+      #sudo sh -c 'echo 3 >/proc/sys/vm/drop_caches'
+
+      # run with heap
+      msg="file_name:$k:$config"
+      echo $msg >> ../$output_file
+      echo "running $msg ..."
+      ./sim/dmc_sim -c ../$config | awk '(/average/)' >> ../$output_file
+    done # end repeat
+    cd ..
+    k=$(( $k + 1 ))
+  done # end k_way
+done # end config
+
diff --git a/benchmark/data_parser.py b/benchmark/data_parser.py
new file mode 100755
index 00000000000..c90d85fd9ab
--- /dev/null
+++ b/benchmark/data_parser.py
@@ -0,0 +1,191 @@
+#!/usr/bin/env python
+
+class DataPoint:  
+  def __init__(self):                
+    self.nserver = 0;
+    self.nclient = 0;
+    self.heap_type = 0;  
+    self.total_time_to_add_req = 0;
+    self.total_time_to_complete_req = 0;
+    self.config = ''
+
+  def set_name(self, config, heap_type):
+    self.config = config;
+    self.heap_type = heap_type
+
+  def get_conig(self):
+    import re
+    return re.split(r"/|\.", self.config)[1]
+
+  def __str__(self):
+    return "s:%d, c:%d,h:%d,config:%s"%(self.nserver, self.nclient, self.heap_type, self.config);
+# end DataPoint
+
+
+def isFloat(elem):        
+ try:
+  float(elem)
+  return True
+ except ValueError:
+  return False
+#end isFloat
+
+
+def parse_config_params(fname):
+  nclient = 0;
+  nserver = 0;
+  # read config file property 
+  with open(fname, 'r') as f:
+    for line in f:
+      line = line.strip('\n \t')
+      if not line: continue;
+      if line.startswith("client_count"):
+        nclient += int(line.split('=')[-1]);
+      if line.startswith("server_count"): 
+        nserver += int(line.split('=')[-1]);
+  # end of file
+  return [nserver, nclient];
+# parse_config_params
+
+def make_aggregate_data_point(dps, config, heap_type): 
+    # create new aggregate point
+    dp = DataPoint();
+    # set set and k_way_heap property
+    dp.set_name(config, heap_type); 
+    
+    num_run = 0
+    for _dp in dps:
+      if _dp.config == config and _dp.heap_type == heap_type:
+        # print _dp, config, heap_type
+        dp.nserver =_dp.nserver
+        dp.nclient = _dp.nclient
+        num_run                       += 1
+        dp.total_time_to_add_req      += _dp.total_time_to_add_req
+        dp.total_time_to_complete_req += _dp.total_time_to_complete_req 
+        
+    # average
+    dp.total_time_to_add_req      /= num_run;
+    dp.total_time_to_complete_req /= num_run
+    #print dp
+    return dp;
+
+def parse_data_points(filename):
+  dps = []; #data-points
+  dp = None;
+  state = 0;
+  configs = {}
+  k_ways  = {}
+  
+  with open(filename, 'r') as f:
+    for line in f:
+      line = line.strip('\n \t')
+      if not line: continue;
+      
+      # file_name:1:configs/dmc_sim_8_6.conf
+      if line.startswith("file_name"):      
+        if dp:
+          dps.append(dp);
+          state = 0;
+         
+        # new data-point 
+        dp = DataPoint();
+        parts = line.split(':')
+        fname = parts[-1];        
+        dp.heap_type = int(parts[1]);
+        if dp.heap_type not in k_ways:
+          k_ways[dp.heap_type] = 1;
+        
+        # add to the dictionary
+        configs[fname] = 1;
+        
+        dp.config = fname;
+        params = parse_config_params(fname)      
+        dp.nserver = params[0];
+        dp.nclient = params[-1];
+         
+      elif line.startswith("average"):	# take last 2 averages
+        r = [float(s) for s in line.split(' ') if isFloat(s)]
+        state +=1;
+        #print r, dp #if isFloat(s)
+        if state == 3:
+          dp.total_time_to_add_req = r[0]
+        elif state == 4:
+          dp.total_time_to_complete_req = r[0]
+        else: pass
+
+      else: 
+        pass;    
+  # final entry
+  dps.append(dp) 
+  
+  # compute average of multiple runs
+  dps_avg = []
+  for config in configs:
+    data_per_config = []
+    for k in k_ways:
+      aggr_dp = make_aggregate_data_point(dps, config , k);
+      data_per_config.append(aggr_dp);
+    dps_avg.append(data_per_config);
+  # end for
+  return dps_avg;
+# end parse_data_points
+
+
+def create_header(num_cols):
+  fields = ['nserver_nclient(config_file)','add_req', 'complete_req'];
+  header = fields[0]
+  #write add_req_{1, ...}
+  for i in range(num_cols):
+    header = '%s %s_%i'%(header, fields[1], i+2)
+  #write complete_req_{1, ...}
+  for i in range(num_cols):
+    header = '%s %s_%i'%(header, fields[2], i+2)
+  # new-line
+  header = '%s\n'%(header)
+  return header
+# end create_header
+
+
+def create_data_line(aggr_dp):
+  # get common info
+  dp = aggr_dp[0]
+  data_line = "s:%d_c:%d "%(dp.nserver, dp.nclient);
+  # get the point-count
+  num_cols = len(aggr_dp);
+  # write add_req_{1, ...}
+  for i in range(num_cols):
+    data_line = '%s %f'%(data_line, aggr_dp[i].total_time_to_add_req)
+  # write complete_req_{1, ...}
+  for i in range(num_cols):
+    data_line = '%s %f'%(data_line, aggr_dp[i].total_time_to_complete_req)
+  # new-line
+  data_line = '%s\n'%(data_line)
+  return data_line
+# end create_data_line
+
+    
+def make_data(filename):
+  # write the aggregated point in space separated file  
+  dps = parse_data_points(filename);
+  if not len(dps) : return
+  print "total points: ", len(dps)
+  # open file
+  with open('%s.dat'%(filename), 'w+') as f:
+    # write header
+    f.write(create_header(len(dps[0])));
+    # write data-line
+    for aggr_dp in dps:
+    	f.write(create_data_line(aggr_dp));
+
+
+def main(output_file):
+  print output_file
+  make_data(output_file);
+
+import sys
+if __name__ == "__main__":
+  file_name="result"
+  if len(sys.argv) > 1:
+    file_name=sys.argv[1].strip()
+  main(file_name)
+
diff --git a/benchmark/plot_gen.sh b/benchmark/plot_gen.sh
new file mode 100755
index 00000000000..d90bde1921a
--- /dev/null
+++ b/benchmark/plot_gen.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+if [ "$1" != "" ]; then
+  output_file="$1"
+else
+  echo "Please provide the name of the output file"
+  exit
+fi
+
+# parameter check -- k-value
+if [ "$2" != "" ]; then
+  k_way="$2"
+else
+  echo "Please provide the maximum K_WAY value"
+  exit
+fi
+#echo "k-way: $k_way"
+#exit
+
+gnuplot << EOF
+
+# Note you need gnuplot 4.4 for the pdfcairo terminal.
+clear
+reset
+
+set terminal pdfcairo size 7in,5in font "Gill Sans,5" linewidth 1 rounded fontscale .8 noenhanced
+set output "${output_file}.pdf"
+
+# starts multiplot
+set multiplot layout 2,1
+
+# Line style for axes
+set style line 80 lt rgb "#808080"
+
+# Line style for grid
+set style line 81 lt 0  # dashed
+set style line 81 lt rgb "#808080"  # grey
+
+set grid back linestyle 81
+set border 3 back linestyle 80 
+
+#set xtics rotate out
+set style data histogram
+set style histogram clustered
+
+set style fill solid border
+set xlabel 'Heap Timing for different K values'   
+set ylabel 'Time (nanosec)'        
+set key top right
+
+set yrange [0:*]
+
+# plot 1
+set title 'Request Addition Time'
+plot for [COL=2:($k_way + 1)] '${output_file}.dat' using COL:xticlabels(1) title columnheader
+
+# plot 2
+set title 'Request Completion Time'
+plot for [COL=($k_way + 2):(2 * $k_way + 1)] '${output_file}.dat' using COL:xticlabels(1) title columnheader
+EOF
diff --git a/benchmark/run.sh b/benchmark/run.sh
new file mode 100755
index 00000000000..11432b53008
--- /dev/null
+++ b/benchmark/run.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+# default value
+k_way=3 #11
+repeat=2 #5
+
+output_file="" 
+if [ "$1" != "" ]; then
+  output_file="$1"
+else
+  echo "Please provide the name of the output file"
+  exit
+fi
+
+echo "generating file ${output_file}"
+sh data_gen.sh ${output_file} ${k_way} ${repeat}
+
+echo "converting ${output_file} to ${output_file}.dat"
+python data_parser.py ${output_file}
+
+echo "now generating bar-chart"
+#gnuplot -e 'output_file=value'  plot_gen.gnuplot 
+sh plot_gen.sh  ${output_file} ${k_way}
+echo "done! check ${output_file}.pdf"
diff --git a/cmake/modules/Findboost.cmake b/cmake/modules/Findboost.cmake
new file mode 100644
index 00000000000..4f0dfd052f0
--- /dev/null
+++ b/cmake/modules/Findboost.cmake
@@ -0,0 +1,15 @@
+# - Find boost
+
+find_path(BOOST_INCLUDE_DIR NAMES boost/variant.hpp
+    PATHS /usr/include /usr/local/include ${BOOST_DIR}/include)
+
+include(FindPackageHandleStandardArgs)
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(boost
+  REQUIRED_VARS BOOST_INCLUDE_DIR)
+
+if(boost_FOUND)
+  set(BOOST_FOUND 1)
+endif()
+if(BOOST_FOUND)
+  set(BOOST_INCLUDES ${BOOST_INCLUDE_DIR})
+endif()
diff --git a/cmake/modules/Findgtest.cmake b/cmake/modules/Findgtest.cmake
new file mode 100644
index 00000000000..bfe0980e4ed
--- /dev/null
+++ b/cmake/modules/Findgtest.cmake
@@ -0,0 +1,48 @@
+# - Find gtest
+#
+#  GTEST_INCLUDE_DIRS   - where to find mcas/mcas.h, etc.
+#  GTEST_LIBRARIES      - List of libraries when using mcas.
+#  GTEST_FOUND          - True if mcas found.
+#
+#  GMOCK_INCLUDE_DIRS   - where to find mcas/mcas.h, etc.
+#  GMOCK_LIBRARIES      - List of libraries when using mcas.
+#  GMOCK_FOUND          - True if mcas found.
+
+
+## GTEST
+
+find_path(GTEST_INCLUDE_DIRS NAMES gtest/gtest.h
+    PATHS /usr/include /usr/local/include)
+
+find_library(GTEST_LIBRARY gtest
+  PATHS /usr/local/lib /usr/lib64)
+
+find_library(GTEST_MAIN_LIBRARY gtest_main
+  PATHS /usr/local/lib /usr/lib64)
+
+include(FindPackageHandleStandardArgs)
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(gtest
+  REQUIRED_VARS GTEST_LIBRARY GTEST_MAIN_LIBRARY GTEST_INCLUDE_DIRS)
+
+if(gtest_FOUND)
+  set(GTEST_FOUND 1)
+endif()
+
+## GMOCK
+
+find_path(GMOCK_INCLUDE_DIRS NAMES gmock/gmock.h
+    PATHS /usr/include /usr/local/include)
+
+find_library(GMOCK_LIBRARY gmock
+  PATHS /usr/local/lib /usr/lib64)
+
+find_library(GMOCK_MAIN_LIBRARY gmock_main
+  PATHS /usr/local/lib /usr/lib64)
+
+include(FindPackageHandleStandardArgs)
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(gmock
+  REQUIRED_VARS GMOCK_LIBRARY GMOCK_MAIN_LIBRARY GMOCK_INCLUDE_DIRS)
+
+if(gmock_FOUND)
+  set(GMOCK_FOUND 1)
+endif()
diff --git a/dmclock-config.cmake.in b/dmclock-config.cmake.in
new file mode 100644
index 00000000000..01636532c1d
--- /dev/null
+++ b/dmclock-config.cmake.in
@@ -0,0 +1,17 @@
+# - Config file for the FooBar package
+# It defines the following variables
+#  DMCLOCK_INCLUDE_DIRS - include directories for FooBar
+#  DMCLOCK_LIBRARIES    - libraries to link against
+ 
+# Compute paths
+get_filename_component(DMCLOCK_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
+set(DMCLOCK_INCLUDE_DIRS "${DMCLOCK_CMAKE_DIR}/src")
+# set(DMCLOCK_INCLUDE_DIRS "@CONF_INCLUDE_DIRS@")
+ 
+# Our library dependencies (contains definitions for IMPORTED targets)
+if(NOT TARGET dmclock AND NOT dmclock_BINARY_DIR)
+  include("${DMCLOCK_CMAKE_DIR}/dmclock-targets.cmake")
+endif()
+ 
+# These are IMPORTED targets created by FooBarTargets.cmake
+set(DMCLOCK_LIBRARIES dmclock)
diff --git a/dmclock-targets.cmake b/dmclock-targets.cmake
new file mode 100644
index 00000000000..2c84f34a142
--- /dev/null
+++ b/dmclock-targets.cmake
@@ -0,0 +1 @@
+export(PACKAGE dmclock)
diff --git a/sim/CMakeLists.txt b/sim/CMakeLists.txt
new file mode 100644
index 00000000000..febd4f0ab6f
--- /dev/null
+++ b/sim/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectory(src)
diff --git a/sim/dmc_sim_100th.conf b/sim/dmc_sim_100th.conf
new file mode 100644
index 00000000000..17d0043548e
--- /dev/null
+++ b/sim/dmc_sim_100th.conf
@@ -0,0 +1,32 @@
+[global]
+server_groups = 1
+client_groups = 2
+server_random_selection = true
+server_soft_limit = true
+
+[client.0]
+client_count = 99
+client_wait = 0
+client_total_ops = 1000
+client_server_select_range = 10
+client_iops_goal = 50
+client_outstanding_ops = 100
+client_reservation = 20.0
+client_limit = 60.0
+client_weight = 1.0
+
+[client.1]
+client_count = 1
+client_wait = 10
+client_total_ops = 1000
+client_server_select_range = 10
+client_iops_goal = 50
+client_outstanding_ops = 100
+client_reservation = 20.0
+client_limit = 60.0
+client_weight = 1.0
+
+[server.0]
+server_count = 100
+server_iops = 40
+server_threads = 1
diff --git a/sim/dmc_sim_example.conf b/sim/dmc_sim_example.conf
new file mode 100644
index 00000000000..989f2f08281
--- /dev/null
+++ b/sim/dmc_sim_example.conf
@@ -0,0 +1,43 @@
+[global]
+server_groups = 1
+client_groups = 3
+server_random_selection = false
+server_soft_limit = false
+
+[client.0]
+client_count = 1
+client_wait = 0
+client_total_ops = 2000
+client_server_select_range = 1
+client_iops_goal = 200
+client_outstanding_ops = 32
+client_reservation = 0.0
+client_limit = 0.0
+client_weight = 1.0
+
+[client.1]
+client_count = 1
+client_wait = 5
+client_total_ops = 2000
+client_server_select_range = 1
+client_iops_goal = 200
+client_outstanding_ops = 32
+client_reservation = 0.0
+client_limit = 40.0
+client_weight = 1.0
+
+[client.2]
+client_count = 1
+client_wait = 10
+client_total_ops = 2000
+client_server_select_range = 1
+client_iops_goal = 200
+client_outstanding_ops = 32
+client_reservation = 0.0
+client_limit = 50.0
+client_weight = 2.0
+
+[server.0]
+server_count = 1
+server_iops = 160
+server_threads = 1
diff --git a/sim/src/CMakeLists.txt b/sim/src/CMakeLists.txt
new file mode 100644
index 00000000000..426827b03f2
--- /dev/null
+++ b/sim/src/CMakeLists.txt
@@ -0,0 +1,42 @@
+include_directories(ssched) # ssched code
+include_directories(../../src) # dmclock code
+include_directories(../../support/src)
+include_directories(${BOOST_INCLUDE_DIR})
+
+set(local_flags "-Wall -pthread ${CMAKE_CXX_SIM_FLAGS}")
+
+set(ssched_sim_srcs test_ssched.cc test_ssched_main.cc)
+set(dmc_sim_srcs test_dmclock.cc test_dmclock_main.cc)
+set(config_srcs config.cc str_list.cc ConfUtils.cc)
+
+set_source_files_properties(${ssched_sim_srcs} ${dmc_sim_srcs} ${dmc_srcs} ${config_srcs}
+  PROPERTIES
+  COMPILE_FLAGS "${local_flags}"
+  )
+
+if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
+  set(warnings_off " -Wno-unused-variable -Wno-unused-function")
+elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
+  set(warnings_off " -Wno-unused-but-set-variable -Wno-unused-function")
+endif()
+
+# append warning flags to certain source files
+set_property(
+  SOURCE ${ssched_sim_srcs} ${dmc_sim_srcs} ${config_srcs}
+  APPEND_STRING
+  PROPERTY COMPILE_FLAGS "${warnings_off}"
+  )
+
+add_executable(ssched_sim EXCLUDE_FROM_ALL ${ssched_sim_srcs})
+add_executable(dmc_sim EXCLUDE_FROM_ALL ${dmc_sim_srcs} ${config_srcs})
+
+set_target_properties(ssched_sim dmc_sim
+  PROPERTIES
+  RUNTIME_OUTPUT_DIRECTORY ..)
+
+add_dependencies(dmc_sim dmclock)
+
+target_link_libraries(ssched_sim LINK_PRIVATE pthread)
+target_link_libraries(dmc_sim LINK_PRIVATE pthread $<TARGET_FILE:dmclock>)
+
+add_custom_target(dmclock-sims DEPENDS ssched_sim dmc_sim)
diff --git a/sim/src/ConfUtils.cc b/sim/src/ConfUtils.cc
new file mode 100644
index 00000000000..74ddb06ee29
--- /dev/null
+++ b/sim/src/ConfUtils.cc
@@ -0,0 +1,574 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2011 New Dream Network
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#include <algorithm>
+#include <errno.h>
+#include <list>
+#include <map>
+#include <sstream>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <string>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <iostream>
+
+#include <assert.h>
+#include "ConfUtils.h"
+
+using std::cerr;
+using std::ostringstream;
+using std::pair;
+using std::string;
+
+#define MAX_CONFIG_FILE_SZ 0x40000000
+
+////////////////////////////// ConfLine //////////////////////////////
+ConfLine::
+ConfLine(const std::string &key_, const std::string val_,
+      const std::string newsection_, const std::string comment_, int line_no_)
+  : key(key_), val(val_), newsection(newsection_)
+{
+  // If you want to implement writable ConfFile support, you'll need to save
+  // the comment and line_no arguments here.
+}
+
+bool ConfLine::
+operator<(const ConfLine &rhs) const
+{
+  // We only compare keys.
+  // If you have more than one line with the same key in a given section, the
+  // last one wins.
+  if (key < rhs.key)
+    return true;
+  else
+    return false;
+}
+
+std::ostream &operator<<(std::ostream& oss, const ConfLine &l)
+{
+  oss << "ConfLine(key = '" << l.key << "', val='"
+      << l.val << "', newsection='" << l.newsection << "')";
+  return oss;
+}
+///////////////////////// ConfFile //////////////////////////
+ConfFile::
+ConfFile()
+{
+}
+
+ConfFile::
+~ConfFile()
+{
+}
+
+void ConfFile::
+clear()
+{
+  sections.clear();
+}
+
+/* We load the whole file into memory and then parse it.  Although this is not
+ * the optimal approach, it does mean that most of this code can be shared with
+ * the bufferlist loading function. Since bufferlists are always in-memory, the
+ * load_from_buffer interface works well for them.
+ * In general, configuration files should be a few kilobytes at maximum, so
+ * loading the whole configuration into memory shouldn't be a problem.
+ */
+int ConfFile::
+parse_file(const std::string &fname, std::deque<std::string> *errors,
+	   std::ostream *warnings)
+{
+  clear();
+
+  int ret = 0;
+  size_t sz;
+  char *buf = NULL;
+  char buf2[128];
+  FILE *fp = fopen(fname.c_str(), "r");
+  if (!fp) {
+    ret = -errno;
+    return ret;
+  }
+
+  struct stat st_buf;
+  if (fstat(fileno(fp), &st_buf)) {
+    ret = -errno;
+    ostringstream oss;
+    oss << "read_conf: failed to fstat '" << fname << "': " << strerror_r(ret, buf2, sizeof(buf2));
+    errors->push_back(oss.str());
+    goto done;
+  }
+
+  if (st_buf.st_size > MAX_CONFIG_FILE_SZ) {
+    ostringstream oss;
+    oss << "read_conf: config file '" << fname << "' is " << st_buf.st_size
+	<< " bytes, but the maximum is " << MAX_CONFIG_FILE_SZ;
+    errors->push_back(oss.str());
+    ret = -EINVAL;
+    goto done;
+  }
+
+  sz = (size_t)st_buf.st_size;
+  buf = (char*)malloc(sz);
+  if (!buf) {
+    ret = -ENOMEM;
+    goto done;
+  }
+
+  if (fread(buf, 1, sz, fp) != sz) {
+    if (ferror(fp)) {
+      ret = -errno;
+      ostringstream oss;
+      oss << "read_conf: fread error while reading '" << fname << "': "
+	  << strerror_r(ret, buf2, sizeof(buf2));
+      errors->push_back(oss.str());
+      goto done;
+    }
+    else {
+      ostringstream oss;
+      oss << "read_conf: unexpected EOF while reading '" << fname << "': "
+	  << "possible concurrent modification?";
+      errors->push_back(oss.str());
+      ret = -EIO;
+      goto done;
+    }
+  }
+
+  load_from_buffer(buf, sz, errors, warnings);
+  ret = 0;
+
+done:
+  free(buf);
+  fclose(fp);
+  return ret;
+}
+
+int ConfFile::
+read(const std::string &section, const std::string &key, std::string &val) const
+{
+  string k(normalize_key_name(key));
+
+  const_section_iter_t s = sections.find(section);
+  if (s == sections.end())
+    return -ENOENT;
+  ConfLine exemplar(k, "", "", "", 0);
+  ConfSection::const_line_iter_t l = s->second.lines.find(exemplar);
+  if (l == s->second.lines.end())
+    return -ENOENT;
+  val = l->val;
+  return 0;
+}
+
+ConfFile::const_section_iter_t ConfFile::
+sections_begin() const
+{
+  return sections.begin();
+}
+
+ConfFile::const_section_iter_t ConfFile::
+sections_end() const
+{
+  return sections.end();
+}
+
+void ConfFile::
+trim_whitespace(std::string &str, bool strip_internal)
+{
+  // strip preceding
+  const char *in = str.c_str();
+  while (true) {
+    char c = *in;
+    if ((!c) || (!isspace(c)))
+      break;
+    ++in;
+  }
+  char output[strlen(in) + 1];
+  strcpy(output, in);
+
+  // strip trailing
+  char *o = output + strlen(output);
+  while (true) {
+    if (o == output)
+      break;
+    --o;
+    if (!isspace(*o)) {
+      ++o;
+      *o = '\0';
+      break;
+    }
+  }
+
+  if (!strip_internal) {
+    str.assign(output);
+    return;
+  }
+
+  // strip internal
+  char output2[strlen(output) + 1];
+  char *out2 = output2;
+  bool prev_was_space = false;
+  for (char *u = output; *u; ++u) {
+    char c = *u;
+    if (isspace(c)) {
+      if (!prev_was_space)
+	*out2++ = c;
+      prev_was_space = true;
+    }
+    else {
+      *out2++ = c;
+      prev_was_space = false;
+    }
+  }
+  *out2++ = '\0';
+  str.assign(output2);
+}
+
+/* Normalize a key name.
+ *
+ * Normalized key names have no leading or trailing whitespace, and all
+ * whitespace is stored as underscores.  The main reason for selecting this
+ * normal form is so that in common/config.cc, we can use a macro to stringify
+ * the field names of md_config_t and get a key in normal form.
+ */
+std::string ConfFile::
+normalize_key_name(const std::string &key)
+{
+  string k(key);
+  ConfFile::trim_whitespace(k, true);
+  std::replace(k.begin(), k.end(), ' ', '_');
+  return k;
+}
+
+std::ostream &operator<<(std::ostream &oss, const ConfFile &cf)
+{
+  for (ConfFile::const_section_iter_t s = cf.sections_begin();
+       s != cf.sections_end(); ++s) {
+    oss << "[" << s->first << "]\n";
+    for (ConfSection::const_line_iter_t l = s->second.lines.begin();
+	 l != s->second.lines.end(); ++l) {
+      if (!l->key.empty()) {
+	oss << "\t" << l->key << " = \"" << l->val << "\"\n";
+      }
+    }
+  }
+  return oss;
+}
+
+void ConfFile::
+load_from_buffer(const char *buf, size_t sz, std::deque<std::string> *errors,
+		 std::ostream *warnings)
+{
+  errors->clear();
+
+  section_iter_t::value_type vt("global", ConfSection());
+  pair < section_iter_t, bool > vr(sections.insert(vt));
+  assert(vr.second);
+  section_iter_t cur_section = vr.first;
+  std::string acc;
+
+  const char *b = buf;
+  int line_no = 0;
+  size_t line_len = -1;
+  size_t rem = sz;
+  while (1) {
+    b += line_len + 1;
+    rem -= line_len + 1;
+    if (rem == 0)
+      break;
+    line_no++;
+
+    // look for the next newline
+    const char *end = (const char*)memchr(b, '\n', rem);
+    if (!end) {
+      ostringstream oss;
+      oss << "read_conf: ignoring line " << line_no << " because it doesn't "
+	  << "end with a newline! Please end the config file with a newline.";
+      errors->push_back(oss.str());
+      break;
+    }
+
+    // find length of line, and search for NULLs
+    line_len = 0;
+    bool found_null = false;
+    for (const char *tmp = b; tmp != end; ++tmp) {
+      line_len++;
+      if (*tmp == '\0') {
+	found_null = true;
+      }
+    }
+
+    if (found_null) {
+      ostringstream oss;
+      oss << "read_conf: ignoring line " << line_no << " because it has "
+	  << "an embedded null.";
+      errors->push_back(oss.str());
+      acc.clear();
+      continue;
+    }
+
+    if ((line_len >= 1) && (b[line_len-1] == '\\')) {
+      // A backslash at the end of a line serves as a line continuation marker.
+      // Combine the next line with this one.
+      // Remove the backslash itself from the text.
+      acc.append(b, line_len - 1);
+      continue;
+    }
+
+    acc.append(b, line_len);
+
+    //cerr << "acc = '" << acc << "'" << std::endl;
+    ConfLine *cline = process_line(line_no, acc.c_str(), errors);
+    acc.clear();
+    if (!cline)
+      continue;
+    const std::string &csection(cline->newsection);
+    if (!csection.empty()) {
+      std::map <std::string, ConfSection>::value_type nt(csection, ConfSection());
+      pair < section_iter_t, bool > nr(sections.insert(nt));
+      cur_section = nr.first;
+    }
+    else {
+      if (cur_section->second.lines.count(*cline)) {
+	// replace an existing key/line in this section, so that
+	//  [mysection]
+	//    foo = 1
+	//    foo = 2
+	// will result in foo = 2.
+	cur_section->second.lines.erase(*cline);
+	if (cline->key.length() && warnings)
+	  *warnings << "warning: line " << line_no << ": '" << cline->key << "' in section '"
+		    << cur_section->first << "' redefined " << std::endl;
+      }
+      // add line to current section
+      //std::cerr << "cur_section = " << cur_section->first << ", " << *cline << std::endl;
+      cur_section->second.lines.insert(*cline);
+    }
+    delete cline;
+  }
+
+  if (!acc.empty()) {
+    ostringstream oss;
+    oss << "read_conf: don't end with lines that end in backslashes!";
+    errors->push_back(oss.str());
+  }
+}
+
+/*
+ * A simple state-machine based parser.
+ * This probably could/should be rewritten with something like boost::spirit
+ * or yacc if the grammar ever gets more complex.
+ */
+ConfLine* ConfFile::
+process_line(int line_no, const char *line, std::deque<std::string> *errors)
+{
+  enum acceptor_state_t {
+    ACCEPT_INIT,
+    ACCEPT_SECTION_NAME,
+    ACCEPT_KEY,
+    ACCEPT_VAL_START,
+    ACCEPT_UNQUOTED_VAL,
+    ACCEPT_QUOTED_VAL,
+    ACCEPT_COMMENT_START,
+    ACCEPT_COMMENT_TEXT,
+  };
+  const char *l = line;
+  acceptor_state_t state = ACCEPT_INIT;
+  string key, val, newsection, comment;
+  bool escaping = false;
+  while (true) {
+    char c = *l++;
+    switch (state) {
+      case ACCEPT_INIT:
+	if (c == '\0')
+	  return NULL; // blank line. Not an error, but not interesting either.
+	else if (c == '[')
+	  state = ACCEPT_SECTION_NAME;
+	else if ((c == '#') || (c == ';'))
+	  state = ACCEPT_COMMENT_TEXT;
+	else if (c == ']') {
+	  ostringstream oss;
+	  oss << "unexpected right bracket at char " << (l - line)
+	      << ", line " << line_no;
+	  errors->push_back(oss.str());
+	  return NULL;
+	}
+	else if (isspace(c)) {
+	  // ignore whitespace here
+	}
+	else {
+	  // try to accept this character as a key
+	  state = ACCEPT_KEY;
+	  --l;
+	}
+	break;
+      case ACCEPT_SECTION_NAME:
+	if (c == '\0') {
+	  ostringstream oss;
+	  oss << "error parsing new section name: expected right bracket "
+	      << "at char " << (l - line) << ", line " << line_no;
+	  errors->push_back(oss.str());
+	  return NULL;
+	}
+	else if ((c == ']') && (!escaping)) {
+	  trim_whitespace(newsection, true);
+	  if (newsection.empty()) {
+	    ostringstream oss;
+	    oss << "error parsing new section name: no section name found? "
+	        << "at char " << (l - line) << ", line " << line_no;
+	    errors->push_back(oss.str());
+	    return NULL;
+	  }
+	  state = ACCEPT_COMMENT_START;
+	}
+	else if (((c == '#') || (c == ';')) && (!escaping)) {
+	  ostringstream oss;
+	  oss << "unexpected comment marker while parsing new section name, at "
+	      << "char " << (l - line) << ", line " << line_no;
+	  errors->push_back(oss.str());
+	  return NULL;
+	}
+	else if ((c == '\\') && (!escaping)) {
+	  escaping = true;
+	}
+	else {
+	  escaping = false;
+	  newsection += c;
+	}
+	break;
+      case ACCEPT_KEY:
+	if ((((c == '#') || (c == ';')) && (!escaping)) || (c == '\0')) {
+	  ostringstream oss;
+	  if (c == '\0') {
+	    oss << "end of key=val line " << line_no
+	        << " reached, no \"=val\" found...missing =?";
+	  } else {
+	    oss << "unexpected character while parsing putative key value, "
+		<< "at char " << (l - line) << ", line " << line_no;
+	  }
+	  errors->push_back(oss.str());
+	  return NULL;
+	}
+	else if ((c == '=') && (!escaping)) {
+	  key = normalize_key_name(key);
+	  if (key.empty()) {
+	    ostringstream oss;
+	    oss << "error parsing key name: no key name found? "
+	        << "at char " << (l - line) << ", line " << line_no;
+	    errors->push_back(oss.str());
+	    return NULL;
+	  }
+	  state = ACCEPT_VAL_START;
+	}
+	else if ((c == '\\') && (!escaping)) {
+	  escaping = true;
+	}
+	else {
+	  escaping = false;
+	  key += c;
+	}
+	break;
+      case ACCEPT_VAL_START:
+	if (c == '\0')
+	  return new ConfLine(key, val, newsection, comment, line_no);
+	else if ((c == '#') || (c == ';'))
+	  state = ACCEPT_COMMENT_TEXT;
+	else if (c == '"')
+	  state = ACCEPT_QUOTED_VAL;
+	else if (isspace(c)) {
+	  // ignore whitespace
+	}
+	else {
+	  // try to accept character as a val
+	  state = ACCEPT_UNQUOTED_VAL;
+	  --l;
+	}
+	break;
+      case ACCEPT_UNQUOTED_VAL:
+	if (c == '\0') {
+	  if (escaping) {
+	    ostringstream oss;
+	    oss << "error parsing value name: unterminated escape sequence "
+	        << "at char " << (l - line) << ", line " << line_no;
+	    errors->push_back(oss.str());
+	    return NULL;
+	  }
+	  trim_whitespace(val, false);
+	  return new ConfLine(key, val, newsection, comment, line_no);
+	}
+	else if (((c == '#') || (c == ';')) && (!escaping)) {
+	  trim_whitespace(val, false);
+	  state = ACCEPT_COMMENT_TEXT;
+	}
+	else if ((c == '\\') && (!escaping)) {
+	  escaping = true;
+	}
+	else {
+	  escaping = false;
+	  val += c;
+	}
+	break;
+      case ACCEPT_QUOTED_VAL:
+	if (c == '\0') {
+	  ostringstream oss;
+	  oss << "found opening quote for value, but not the closing quote. "
+	      << "line " << line_no;
+	  errors->push_back(oss.str());
+	  return NULL;
+	}
+	else if ((c == '"') && (!escaping)) {
+	  state = ACCEPT_COMMENT_START;
+	}
+	else if ((c == '\\') && (!escaping)) {
+	  escaping = true;
+	}
+	else {
+	  escaping = false;
+	  // Add anything, including whitespace.
+	  val += c;
+	}
+	break;
+      case ACCEPT_COMMENT_START:
+	if (c == '\0') {
+	  return new ConfLine(key, val, newsection, comment, line_no);
+	}
+	else if ((c == '#') || (c == ';')) {
+	  state = ACCEPT_COMMENT_TEXT;
+	}
+	else if (isspace(c)) {
+	  // ignore whitespace
+	}
+	else {
+	  ostringstream oss;
+	  oss << "unexpected character at char " << (l - line) << " of line "
+	      << line_no;
+	  errors->push_back(oss.str());
+	  return NULL;
+	}
+	break;
+      case ACCEPT_COMMENT_TEXT:
+	if (c == '\0')
+	  return new ConfLine(key, val, newsection, comment, line_no);
+	else
+	  comment += c;
+	break;
+      default:
+	assert(0);
+	break;
+    }
+    assert(c != '\0'); // We better not go past the end of the input string.
+  }
+}
diff --git a/sim/src/ConfUtils.h b/sim/src/ConfUtils.h
new file mode 100644
index 00000000000..6c9c2c6c9c8
--- /dev/null
+++ b/sim/src/ConfUtils.h
@@ -0,0 +1,83 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2011 New Dream Network
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#ifndef CEPH_CONFUTILS_H
+#define CEPH_CONFUTILS_H
+
+#include <deque>
+#include <map>
+#include <set>
+#include <string>
+
+/*
+ * Ceph configuration file support.
+ *
+ * This class loads an INI-style configuration from a file or bufferlist, and
+ * holds it in memory. In general, an INI configuration file is composed of
+ * sections, which contain key/value pairs. You can put comments on the end of
+ * lines by using either a hash mark (#) or the semicolon (;).
+ *
+ * You can get information out of ConfFile by calling get_key or by examining
+ * individual sections.
+ *
+ * This class could be extended to support modifying configuration files and
+ * writing them back out without too much difficulty. Currently, this is not
+ * implemented, and the file is read-only.
+ */
+class ConfLine {
+public:
+  ConfLine(const std::string &key_, const std::string val_,
+	   const std::string newsection_, const std::string comment_, int line_no_);
+  bool operator<(const ConfLine &rhs) const;
+  friend std::ostream &operator<<(std::ostream& oss, const ConfLine &l);
+
+  std::string key, val, newsection;
+};
+
+class ConfSection {
+public:
+  typedef std::set <ConfLine>::const_iterator const_line_iter_t;
+
+  std::set <ConfLine> lines;
+};
+
+class ConfFile {
+public:
+  typedef std::map <std::string, ConfSection>::iterator section_iter_t;
+  typedef std::map <std::string, ConfSection>::const_iterator const_section_iter_t;
+
+  ConfFile();
+  ~ConfFile();
+  void clear();
+  int parse_file(const std::string &fname, std::deque<std::string> *errors, std::ostream *warnings);
+  int read(const std::string &section, const std::string &key,
+	      std::string &val) const;
+
+  const_section_iter_t sections_begin() const;
+  const_section_iter_t sections_end() const;
+
+  static void trim_whitespace(std::string &str, bool strip_internal);
+  static std::string normalize_key_name(const std::string &key);
+  friend std::ostream &operator<<(std::ostream &oss, const ConfFile &cf);
+
+private:
+  void load_from_buffer(const char *buf, size_t sz,
+			std::deque<std::string> *errors, std::ostream *warnings);
+  static ConfLine* process_line(int line_no, const char *line,
+			        std::deque<std::string> *errors);
+
+  std::map <std::string, ConfSection> sections;
+};
+
+#endif
diff --git a/sim/src/config.cc b/sim/src/config.cc
new file mode 100644
index 00000000000..a6702897cd6
--- /dev/null
+++ b/sim/src/config.cc
@@ -0,0 +1,171 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+
+#include <unistd.h>
+#include <string.h>
+#include <stdarg.h>
+
+#include <iostream>
+#include <vector>
+#include <list>
+
+#include "config.h"
+#include "str_list.h"
+
+
+static void dashes_to_underscores(const char *input, char *output) {
+  char c = 0;
+  char *o = output;
+  const char *i = input;
+  // first two characters are copied as-is
+  *o = *i++;
+  if (*o++ == '\0')
+    return;
+  *o = *i++;
+  if (*o++ == '\0')
+    return;
+  for (; ((c = *i)); ++i) {
+    if (c == '=') {
+      strcpy(o, i);
+      return;
+    }
+    if (c == '-')
+      *o++ = '_';
+    else
+      *o++ = c;
+  }
+  *o++ = '\0';
+}
+
+static int va_ceph_argparse_witharg(std::vector<const char*> &args,
+	std::vector<const char*>::iterator &i, std::string *ret,
+	std::ostream &oss, va_list ap) {
+  const char *first = *i;
+  char tmp[strlen(first)+1];
+  dashes_to_underscores(first, tmp);
+  first = tmp;
+
+  // does this argument match any of the possibilities?
+  while (1) {
+    const char *a = va_arg(ap, char*);
+    if (a == NULL)
+      return 0;
+    int strlen_a = strlen(a);
+    char a2[strlen_a+1];
+    dashes_to_underscores(a, a2);
+    if (strncmp(a2, first, strlen(a2)) == 0) {
+      if (first[strlen_a] == '=') {
+	*ret = first + strlen_a + 1;
+	i = args.erase(i);
+	return 1;
+      }
+      else if (first[strlen_a] == '\0') {
+	// find second part (or not)
+	if (i+1 == args.end()) {
+	  oss << "Option " << *i << " requires an argument." << std::endl;
+	  i = args.erase(i);
+	  return -EINVAL;
+	}
+	i = args.erase(i);
+	*ret = *i;
+	i = args.erase(i);
+	return 1;
+      }
+    }
+  }
+}
+
+bool crimson::qos_simulation::ceph_argparse_witharg(std::vector<const char*> &args,
+	std::vector<const char*>::iterator &i, std::string *ret, ...) {
+  int r;
+  va_list ap;
+  va_start(ap, ret);
+  r = va_ceph_argparse_witharg(args, i, ret, std::cerr, ap);
+  va_end(ap);
+  if (r < 0)
+    _exit(1);
+  return r != 0;
+}
+
+void crimson::qos_simulation::ceph_argparse_early_args(std::vector<const char*>& args, std::string *conf_file_list) {
+  std::string val;
+
+  std::vector<const char *> orig_args = args;
+
+  for (std::vector<const char*>::iterator i = args.begin(); i != args.end(); ) {
+    if (ceph_argparse_witharg(args, i, &val, "--conf", "-c", (char*)NULL)) {
+      *conf_file_list = val;
+    }
+    else {
+      // ignore
+      ++i;
+    }
+  }
+  return;
+}
+
+static bool stobool(const std::string & v) {
+    return !v.empty () &&
+           (strcasecmp (v.c_str (), "true") == 0 ||
+	   atoi (v.c_str ()) != 0);
+}
+
+int crimson::qos_simulation::parse_config_file(const std::string &fname, sim_config_t &g_conf) {
+  ConfFile cf;
+  std::deque<std::string> err;
+  std::ostringstream warn;
+  int ret = cf.parse_file(fname.c_str(), &err, &warn);
+  if (ret) {
+    // error
+    return ret;
+  }
+
+  std::string val;
+  if (!cf.read("global", "server_groups", val))
+    g_conf.server_groups = std::stoul(val);
+  if (!cf.read("global", "client_groups", val))
+    g_conf.client_groups = std::stoul(val);
+  if (!cf.read("global", "server_random_selection", val))
+    g_conf.server_random_selection = stobool(val);
+  if (!cf.read("global", "server_soft_limit", val))
+    g_conf.server_soft_limit = stobool(val);
+
+  for (uint i = 0; i < g_conf.server_groups; i++) {
+    srv_group_t st;
+    std::string section = "server." + std::to_string(i);
+    if (!cf.read(section, "server_count", val))
+      st.server_count = std::stoul(val);
+    if (!cf.read(section, "server_iops", val))
+      st.server_iops = std::stoul(val);
+    if (!cf.read(section, "server_threads", val))
+      st.server_threads = std::stoul(val);
+    g_conf.srv_group.push_back(st);
+  }
+
+  for (uint i = 0; i < g_conf.client_groups; i++) {
+    cli_group_t ct;
+    std::string section = "client." + std::to_string(i);
+    if (!cf.read(section, "client_count", val))
+      ct.client_count = std::stoul(val);
+    if (!cf.read(section, "client_wait", val))
+      ct.client_wait = std::chrono::seconds(std::stoul(val));
+    if (!cf.read(section, "client_total_ops", val))
+      ct.client_total_ops = std::stoul(val);
+    if (!cf.read(section, "client_server_select_range", val))
+      ct.client_server_select_range = std::stoul(val);
+    if (!cf.read(section, "client_iops_goal", val))
+      ct.client_iops_goal = std::stoul(val);
+    if (!cf.read(section, "client_outstanding_ops", val))
+      ct.client_outstanding_ops = std::stoul(val);
+    if (!cf.read(section, "client_reservation", val))
+      ct.client_reservation = std::stod(val);
+    if (!cf.read(section, "client_limit", val))
+      ct.client_limit = std::stod(val);
+    if (!cf.read(section, "client_weight", val))
+      ct.client_weight = std::stod(val);
+    g_conf.cli_group.push_back(ct);
+  }
+
+  return 0;
+}
diff --git a/sim/src/config.h b/sim/src/config.h
new file mode 100644
index 00000000000..010f33a743e
--- /dev/null
+++ b/sim/src/config.h
@@ -0,0 +1,138 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+
+#pragma once
+
+
+#include <string.h>
+
+#include <chrono>
+#include <vector>
+#include <sstream>
+#include <iomanip>
+
+#include "ConfUtils.h"
+
+
+namespace crimson {
+  namespace qos_simulation {
+
+    struct cli_group_t {
+      uint client_count;
+      std::chrono::seconds client_wait;
+      uint client_total_ops;
+      uint client_server_select_range;
+      uint client_iops_goal;
+      uint client_outstanding_ops;
+      double client_reservation;
+      double client_limit;
+      double client_weight;
+
+      cli_group_t(uint _client_count = 100,
+		  uint _client_wait = 0,
+		  uint _client_total_ops = 1000,
+		  uint _client_server_select_range = 10,
+		  uint _client_iops_goal = 50,
+		  uint _client_outstanding_ops = 100,
+		  double _client_reservation = 20.0,
+		  double _client_limit = 60.0,
+		  double _client_weight = 1.0) :
+	client_count(_client_count),
+	client_wait(std::chrono::seconds(_client_wait)),
+	client_total_ops(_client_total_ops),
+	client_server_select_range(_client_server_select_range),
+	client_iops_goal(_client_iops_goal),
+	client_outstanding_ops(_client_outstanding_ops),
+	client_reservation(_client_reservation),
+	client_limit(_client_limit),
+	client_weight(_client_weight)
+      {
+	// empty
+      }
+
+      friend std::ostream& operator<<(std::ostream& out,
+	  const cli_group_t& cli_group) {
+	out <<
+	  "client_count = " << cli_group.client_count << "\n" <<
+	  "client_wait = " << cli_group.client_wait.count() << "\n" <<
+	  "client_total_ops = " << cli_group.client_total_ops << "\n" <<
+	  "client_server_select_range = " << cli_group.client_server_select_range << "\n" <<
+	  "client_iops_goal = " << cli_group.client_iops_goal << "\n" <<
+	  "client_outstanding_ops = " << cli_group.client_outstanding_ops << "\n" <<
+	  std::fixed << std::setprecision(1) <<
+	  "client_reservation = " << cli_group.client_reservation << "\n" <<
+	  "client_limit = " << cli_group.client_limit << "\n" <<
+	  "client_weight = " << cli_group.client_weight;
+	return out;
+      }
+    }; // class cli_group_t
+
+
+    struct srv_group_t {
+      uint server_count;
+      uint server_iops;
+      uint server_threads;
+
+      srv_group_t(uint _server_count = 100,
+		  uint _server_iops = 40,
+		  uint _server_threads = 1) :
+	server_count(_server_count),
+	server_iops(_server_iops),
+	server_threads(_server_threads)
+      {
+	// empty
+      }
+
+      friend std::ostream& operator<<(std::ostream& out,
+	  const srv_group_t& srv_group) {
+	out <<
+	  "server_count = " << srv_group.server_count << "\n" <<
+	  "server_iops = " << srv_group.server_iops << "\n" <<
+	  "server_threads = " << srv_group.server_threads;
+	return out;
+      }
+    }; // class srv_group_t
+
+
+    struct sim_config_t {
+      uint server_groups;
+      uint client_groups;
+      bool server_random_selection;
+      bool server_soft_limit;
+
+      std::vector<cli_group_t> cli_group;
+      std::vector<srv_group_t> srv_group;
+
+      sim_config_t(uint _server_groups = 1,
+		   uint _client_groups = 1,
+		   bool _server_random_selection = false,
+		   bool _server_soft_limit = true) :
+	server_groups(_server_groups),
+	client_groups(_client_groups),
+	server_random_selection(_server_random_selection),
+	server_soft_limit(_server_soft_limit)
+      {
+	srv_group.reserve(server_groups);
+	cli_group.reserve(client_groups);
+      }
+
+      friend std::ostream& operator<<(std::ostream& out,
+	  const sim_config_t& sim_config) {
+	out <<
+	  "server_groups = " << sim_config.server_groups << "\n" <<
+	  "client_groups = " << sim_config.client_groups << "\n" <<
+	  "server_random_selection = " << sim_config.server_random_selection << "\n" <<
+	  "server_soft_limit = " << sim_config.server_soft_limit;
+	return out;
+      }
+    }; // class sim_config_t
+
+
+    bool ceph_argparse_witharg(std::vector<const char*> &args,
+	std::vector<const char*>::iterator &i, std::string *ret, ...);
+    void ceph_argparse_early_args(std::vector<const char*>& args, std::string *conf_file_list);
+    int parse_config_file(const std::string &fname, sim_config_t &g_conf);
+
+  }; // namespace qos_simulation
+}; // namespace crimson
diff --git a/sim/src/sim_client.h b/sim/src/sim_client.h
new file mode 100644
index 00000000000..6538dab2c08
--- /dev/null
+++ b/sim/src/sim_client.h
@@ -0,0 +1,329 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ */
+
+
+#pragma once
+
+
+#include <atomic>
+#include <mutex>
+#include <condition_variable>
+#include <thread>
+#include <chrono>
+#include <vector>
+#include <deque>
+#include <iostream>
+
+#include "sim_recs.h"
+
+
+namespace crimson {
+  namespace qos_simulation {
+
+    struct req_op_t {};
+    struct wait_op_t {};
+    constexpr struct req_op_t req_op {};
+    constexpr struct wait_op_t wait_op {};
+
+
+    enum class CliOp { req, wait };
+    struct CliInst {
+      CliOp op;
+      union {
+	std::chrono::milliseconds wait_time;
+	struct {
+	  uint32_t count;
+	  std::chrono::microseconds time_bw_reqs;
+	  uint16_t max_outstanding;
+	} req_params;
+      } args;
+
+      // D is a duration type
+      template<typename D>
+      CliInst(wait_op_t, D duration) :
+	op(CliOp::wait)
+      {
+	args.wait_time =
+	  std::chrono::duration_cast<std::chrono::milliseconds>(duration);
+      }
+
+      CliInst(req_op_t,
+	      uint32_t count, double ops_per_sec, uint16_t max_outstanding) :
+	op(CliOp::req)
+      {
+	args.req_params.count = count;
+	args.req_params.max_outstanding = max_outstanding;
+	uint32_t us = uint32_t(0.5 + 1.0 / ops_per_sec * 1000000);
+	args.req_params.time_bw_reqs = std::chrono::microseconds(us);
+      }
+    };
+
+
+    using ServerSelectFunc = std::function<const ServerId&(uint64_t seed)>;
+
+
+    template<typename SvcTrk, typename ReqPm, typename RespPm, typename Accum>
+    class SimulatedClient {
+    public:
+
+      struct InternalStats {
+	std::mutex mtx;
+	std::chrono::nanoseconds track_resp_time;
+	std::chrono::nanoseconds get_req_params_time;
+	uint32_t track_resp_count;
+	uint32_t get_req_params_count;
+
+	InternalStats() :
+	  track_resp_time(0),
+	  get_req_params_time(0),
+	  track_resp_count(0),
+	  get_req_params_count(0)
+	{
+	  // empty
+	}
+      };
+
+      using SubmitFunc =
+	std::function<void(const ServerId&,
+			   const TestRequest&,
+			   const ClientId&,
+			   const ReqPm&)>;
+
+      using ClientAccumFunc = std::function<void(Accum&,const RespPm&)>;
+
+      typedef std::chrono::time_point<std::chrono::steady_clock> TimePoint;
+
+      static TimePoint now() { return std::chrono::steady_clock::now(); }
+
+    protected:
+
+      struct RespQueueItem {
+	TestResponse response;
+	ServerId     server_id;
+	RespPm       resp_params;
+      };
+
+      const ClientId id;
+      const SubmitFunc submit_f;
+      const ServerSelectFunc server_select_f;
+      const ClientAccumFunc accum_f;
+
+      std::vector<CliInst> instructions;
+
+      SvcTrk service_tracker;
+
+      // TODO: use lock rather than atomic???
+      std::atomic_ulong        outstanding_ops;
+      std::atomic_bool         requests_complete;
+
+      std::deque<RespQueueItem> resp_queue;
+
+      std::mutex               mtx_req;
+      std::condition_variable  cv_req;
+
+      std::mutex               mtx_resp;
+      std::condition_variable  cv_resp;
+
+      using RespGuard = std::lock_guard<decltype(mtx_resp)>;
+      using Lock = std::unique_lock<std::mutex>;
+
+      // data collection
+
+      std::vector<TimePoint>   op_times;
+      Accum                    accumulator;
+      InternalStats            internal_stats;
+
+      std::thread              thd_req;
+      std::thread              thd_resp;
+
+    public:
+
+      SimulatedClient(ClientId _id,
+		      const SubmitFunc& _submit_f,
+		      const ServerSelectFunc& _server_select_f,
+		      const ClientAccumFunc& _accum_f,
+		      const std::vector<CliInst>& _instrs) :
+	id(_id),
+	submit_f(_submit_f),
+	server_select_f(_server_select_f),
+	accum_f(_accum_f),
+	instructions(_instrs),
+	service_tracker(),
+	outstanding_ops(0),
+	requests_complete(false)
+      {
+	size_t op_count = 0;
+	for (auto i : instructions) {
+	  if (CliOp::req == i.op) {
+	    op_count += i.args.req_params.count;
+	  }
+	}
+	op_times.reserve(op_count);
+
+	thd_resp = std::thread(&SimulatedClient::run_resp, this);
+	thd_req = std::thread(&SimulatedClient::run_req, this);
+      }
+
+
+      SimulatedClient(ClientId _id,
+		      const SubmitFunc& _submit_f,
+		      const ServerSelectFunc& _server_select_f,
+		      const ClientAccumFunc& _accum_f,
+		      uint16_t _ops_to_run,
+		      double _iops_goal,
+		      uint16_t _outstanding_ops_allowed) :
+	SimulatedClient(_id,
+			_submit_f, _server_select_f, _accum_f,
+			{{req_op, _ops_to_run, _iops_goal, _outstanding_ops_allowed}})
+      {
+	// empty
+      }
+
+
+      SimulatedClient(const SimulatedClient&) = delete;
+      SimulatedClient(SimulatedClient&&) = delete;
+      SimulatedClient& operator=(const SimulatedClient&) = delete;
+      SimulatedClient& operator=(SimulatedClient&&) = delete;
+
+      virtual ~SimulatedClient() {
+	wait_until_done();
+      }
+
+      void receive_response(const TestResponse& resp,
+			    const ServerId& server_id,
+			    const RespPm& resp_params) {
+	RespGuard g(mtx_resp);
+	resp_queue.push_back(RespQueueItem{resp, server_id, resp_params});
+	cv_resp.notify_one();
+      }
+
+      const std::vector<TimePoint>& get_op_times() const { return op_times; }
+
+      void wait_until_done() {
+	if (thd_req.joinable()) thd_req.join();
+	if (thd_resp.joinable()) thd_resp.join();
+      }
+
+      const Accum& get_accumulator() const { return accumulator; }
+
+      const InternalStats& get_internal_stats() const { return internal_stats; }
+
+    protected:
+
+      void run_req() {
+	size_t ops_count = 0;
+	for (auto i : instructions) {
+	  if (CliOp::wait == i.op) {
+	    std::this_thread::sleep_for(i.args.wait_time);
+	  } else if (CliOp::req == i.op) {
+	    Lock l(mtx_req);
+	    for (uint64_t o = 0; o < i.args.req_params.count; ++o) {
+	      while (outstanding_ops >= i.args.req_params.max_outstanding) {
+		cv_req.wait(l);
+	      }
+
+	      l.unlock();
+	      auto now = std::chrono::steady_clock::now();
+	      const ServerId& server = server_select_f(o);
+
+	      ReqPm rp =
+		time_stats_w_return<decltype(internal_stats.get_req_params_time),
+				    ReqPm>(internal_stats.mtx,
+					   internal_stats.get_req_params_time,
+					   [&]() -> ReqPm {
+					     return service_tracker.get_req_params(server);
+					   });
+	      count_stats(internal_stats.mtx,
+			  internal_stats.get_req_params_count);
+
+	      TestRequest req(server, o, 12);
+	      submit_f(server, req, id, rp);
+	      ++outstanding_ops;
+	      l.lock(); // lock for return to top of loop
+
+	      auto delay_time = now + i.args.req_params.time_bw_reqs;
+	      while (std::chrono::steady_clock::now() < delay_time) {
+		cv_req.wait_until(l, delay_time);
+	      } // while
+	    } // for
+	    ops_count += i.args.req_params.count;
+	  } else {
+	    assert(false);
+	  }
+	} // for loop
+
+	requests_complete = true;
+
+	// all requests made, thread ends
+      }
+
+
+      void run_resp() {
+	std::chrono::milliseconds delay(1000);
+	int op = 0;
+
+	Lock l(mtx_resp);
+
+	// since the following code would otherwise be repeated (except for
+	// the call to notify_one) in the two loops below; let's avoid
+	// repetition and define it once.
+	const auto proc_resp = [this, &op, &l](const bool notify_req_cv) {
+	  if (!resp_queue.empty()) {
+	    RespQueueItem item = resp_queue.front();
+	    resp_queue.pop_front();
+
+	    l.unlock();
+
+	    // data collection
+
+	    op_times.push_back(now());
+	    accum_f(accumulator, item.resp_params);
+
+	    // processing
+
+#if 0 // not needed
+	    TestResponse& resp = item.response;
+#endif
+
+	    time_stats(internal_stats.mtx,
+		       internal_stats.track_resp_time,
+		       [&](){
+			 service_tracker.track_resp(item.server_id, item.resp_params);
+		       });
+	    count_stats(internal_stats.mtx,
+			internal_stats.track_resp_count);
+
+	    --outstanding_ops;
+	    if (notify_req_cv) {
+	      cv_req.notify_one();
+	    }
+
+	    l.lock();
+	  }
+	};
+
+	while(!requests_complete.load()) {
+	  while(resp_queue.empty() && !requests_complete.load()) {
+	    cv_resp.wait_for(l, delay);
+	  }
+	  proc_resp(true);
+	}
+
+	while(outstanding_ops.load() > 0) {
+	  while(resp_queue.empty() && outstanding_ops.load() > 0) {
+	    cv_resp.wait_for(l, delay);
+	  }
+	  proc_resp(false); // don't call notify_one as all requests are complete
+	}
+
+	// all responses received, thread ends
+      }
+    }; // class SimulatedClient
+
+
+  }; // namespace qos_simulation
+}; // namespace crimson
diff --git a/sim/src/sim_recs.h b/sim/src/sim_recs.h
new file mode 100644
index 00000000000..b64750db4af
--- /dev/null
+++ b/sim/src/sim_recs.h
@@ -0,0 +1,121 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ */
+
+
+#pragma once
+
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <signal.h>
+
+#include <sys/time.h>
+
+#include <cmath>
+#include <limits>
+#include <string>
+#include <mutex>
+#include <iostream>
+
+
+using ClientId = uint;
+using ServerId = uint;
+
+
+namespace crimson {
+  namespace qos_simulation {
+
+    inline void debugger() {
+      raise(SIGCONT);
+    }
+
+    template<typename T>
+    void time_stats(std::mutex& mtx,
+		    T& time_accumulate,
+		    std::function<void()> code) {
+      auto t1 = std::chrono::steady_clock::now();
+      code();
+      auto t2 = std::chrono::steady_clock::now();
+      auto duration = t2 - t1;
+      auto cast_duration = std::chrono::duration_cast<T>(duration);
+      std::lock_guard<std::mutex> lock(mtx);
+      time_accumulate += cast_duration;
+    }
+
+    // unfortunately it's hard for the compiler to infer the types,
+    // and therefore when called the template params might have to be
+    // explicit
+    template<typename T, typename R>
+    R time_stats_w_return(std::mutex& mtx,
+			  T& time_accumulate,
+			  std::function<R()> code) {
+      auto t1 = std::chrono::steady_clock::now();
+      R result = code();
+      auto t2 = std::chrono::steady_clock::now();
+      auto duration = t2 - t1;
+      auto cast_duration = std::chrono::duration_cast<T>(duration);
+      std::lock_guard<std::mutex> lock(mtx);
+      time_accumulate += cast_duration;
+      return result;
+    }
+
+    template<typename T>
+    void count_stats(std::mutex& mtx,
+		     T& counter) {
+      std::lock_guard<std::mutex> lock(mtx);
+      ++counter;
+    }
+
+    struct TestRequest {
+      ServerId server; // allows debugging
+      uint32_t epoch;
+      uint32_t op;
+
+      TestRequest(ServerId _server,
+		  uint32_t _epoch,
+		  uint32_t _op) :
+	server(_server),
+	epoch(_epoch),
+	op(_op)
+      {
+	// empty
+      }
+
+      TestRequest(const TestRequest& r) :
+	TestRequest(r.server, r.epoch, r.op)
+      {
+	// empty
+      }
+    }; // struct TestRequest
+
+
+    struct TestResponse {
+      uint32_t epoch;
+
+      TestResponse(uint32_t _epoch) :
+	epoch(_epoch)
+      {
+	// empty
+      }
+
+      TestResponse(const TestResponse& r) :
+	epoch(r.epoch)
+      {
+	// empty
+      }
+
+      friend std::ostream& operator<<(std::ostream& out, const TestResponse& resp) {
+	out << "{ ";
+	out << "epoch:" << resp.epoch;
+	out << " }";
+	return out;
+      }
+    }; // class TestResponse
+
+  }; // namespace qos_simulation
+}; // namespace crimson
diff --git a/sim/src/sim_server.h b/sim/src/sim_server.h
new file mode 100644
index 00000000000..a61cc3204e4
--- /dev/null
+++ b/sim/src/sim_server.h
@@ -0,0 +1,225 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ */
+
+
+#pragma once
+
+
+#include <thread>
+#include <mutex>
+#include <condition_variable>
+#include <chrono>
+#include <deque>
+
+#include "sim_recs.h"
+
+
+namespace crimson {
+  namespace qos_simulation {
+
+    template<typename Q, typename ReqPm, typename RespPm, typename Accum>
+    class SimulatedServer {
+
+      struct QueueItem {
+	ClientId                     client;
+	std::unique_ptr<TestRequest> request;
+	RespPm                       additional;
+
+	QueueItem(const ClientId&                _client,
+		  std::unique_ptr<TestRequest>&& _request,
+		  const RespPm&                  _additional) :
+	  client(_client),
+	  request(std::move(_request)),
+	  additional(_additional)
+	{
+	  // empty
+	}
+      }; // QueueItem
+
+    public:
+
+      struct InternalStats {
+	std::mutex mtx;
+	std::chrono::nanoseconds add_request_time;
+	std::chrono::nanoseconds request_complete_time;
+	uint32_t add_request_count;
+	uint32_t request_complete_count;
+
+	InternalStats() :
+	  add_request_time(0),
+	  request_complete_time(0),
+	  add_request_count(0),
+	  request_complete_count(0)
+	{
+	  // empty
+	}
+      };
+
+      using ClientRespFunc = std::function<void(ClientId,
+						const TestResponse&,
+						const ServerId&,
+						const RespPm&)>;
+
+      using ServerAccumFunc = std::function<void(Accum& accumulator,
+						 const RespPm& additional)>;
+
+    protected:
+
+      const ServerId                 id;
+      Q*                             priority_queue;
+      ClientRespFunc                 client_resp_f;
+      int                            iops;
+      size_t                         thread_pool_size;
+
+      bool                           finishing;
+      std::chrono::microseconds      op_time;
+
+      std::mutex                     inner_queue_mtx;
+      std::condition_variable        inner_queue_cv;
+      std::deque<QueueItem>          inner_queue;
+
+      std::thread*                   threads;
+
+      using InnerQGuard = std::lock_guard<decltype(inner_queue_mtx)>;
+      using Lock = std::unique_lock<std::mutex>;
+
+      // data collection
+
+      ServerAccumFunc accum_f;
+      Accum accumulator;
+
+      InternalStats internal_stats;
+
+    public:
+
+      using CanHandleRequestFunc = std::function<bool(void)>;
+      using HandleRequestFunc =
+	std::function<void(const ClientId&,std::unique_ptr<TestRequest>,const RespPm&)>;
+      using CreateQueueF = std::function<Q*(CanHandleRequestFunc,HandleRequestFunc)>;
+					
+
+      SimulatedServer(ServerId _id,
+		      int _iops,
+		      size_t _thread_pool_size,
+		      const ClientRespFunc& _client_resp_f,
+		      const ServerAccumFunc& _accum_f,
+		      CreateQueueF _create_queue_f) :
+	id(_id),
+	priority_queue(_create_queue_f(std::bind(&SimulatedServer::has_avail_thread,
+						 this),
+				       std::bind(&SimulatedServer::inner_post,
+						 this,
+						 std::placeholders::_1,
+						 std::placeholders::_2,
+						 std::placeholders::_3))),
+	client_resp_f(_client_resp_f),
+	iops(_iops),
+	thread_pool_size(_thread_pool_size),
+	finishing(false),
+	accum_f(_accum_f)
+      {
+	op_time =
+	  std::chrono::microseconds((int) (0.5 +
+					   thread_pool_size * 1000000.0 / iops));
+	std::chrono::milliseconds delay(1000);
+	threads = new std::thread[thread_pool_size];
+	for (size_t i = 0; i < thread_pool_size; ++i) {
+	  threads[i] = std::thread(&SimulatedServer::run, this, delay);
+	}
+      }
+
+      virtual ~SimulatedServer() {
+	Lock l(inner_queue_mtx);
+	finishing = true;
+	inner_queue_cv.notify_all();
+	l.unlock();
+
+	for (size_t i = 0; i < thread_pool_size; ++i) {
+	  threads[i].join();
+	}
+
+	delete[] threads;
+      }
+
+      void post(const TestRequest& request,
+		const ClientId& client_id,
+		const ReqPm& req_params)
+      {
+	time_stats(internal_stats.mtx,
+		   internal_stats.add_request_time,
+		   [&](){
+		     priority_queue->add_request(request, client_id, req_params);
+		   });
+	count_stats(internal_stats.mtx,
+		    internal_stats.add_request_count);
+      }
+
+      bool has_avail_thread() {
+	InnerQGuard g(inner_queue_mtx);
+	return inner_queue.size() <= thread_pool_size;
+      }
+
+      const Accum& get_accumulator() const { return accumulator; }
+      const Q& get_priority_queue() const { return *priority_queue; }
+      const InternalStats& get_internal_stats() const { return internal_stats; }
+
+    protected:
+
+      void inner_post(const ClientId& client,
+		      std::unique_ptr<TestRequest> request,
+		      const RespPm& additional) {
+	Lock l(inner_queue_mtx);
+	assert(!finishing);
+	accum_f(accumulator, additional);
+	inner_queue.emplace_back(QueueItem(client,
+					   std::move(request),
+					   additional));
+	inner_queue_cv.notify_one();
+      }
+
+      void run(std::chrono::milliseconds check_period) {
+	Lock l(inner_queue_mtx);
+	while(true) {
+	  while(inner_queue.empty() && !finishing) {
+	    inner_queue_cv.wait_for(l, check_period);
+	  }
+	  if (!inner_queue.empty()) {
+	    auto& front = inner_queue.front();
+	    auto client = front.client;
+	    auto req = std::move(front.request);
+	    auto additional = front.additional;
+	    inner_queue.pop_front();
+
+	    l.unlock();
+
+	    // simulation operation by sleeping; then call function to
+	    // notify server of completion
+	    std::this_thread::sleep_for(op_time);
+
+	    TestResponse resp(req->epoch);
+	    // TODO: rather than assuming this constructor exists, perhaps
+	    // pass in a function that does this mapping?
+	    client_resp_f(client, resp, id, additional);
+
+	    time_stats(internal_stats.mtx,
+		       internal_stats.request_complete_time,
+		       [&](){
+			 priority_queue->request_completed();
+		       });
+	    count_stats(internal_stats.mtx,
+			internal_stats.request_complete_count);
+
+	    l.lock(); // in prep for next iteration of loop
+	  } else {
+	    break;
+	  }
+	}
+      }
+    }; // class SimulatedServer
+
+  }; // namespace qos_simulation
+}; // namespace crimson
diff --git a/sim/src/simulate.h b/sim/src/simulate.h
new file mode 100644
index 00000000000..18e752d8a35
--- /dev/null
+++ b/sim/src/simulate.h
@@ -0,0 +1,430 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ */
+
+
+#pragma once
+
+
+#include <assert.h>
+
+#include <memory>
+#include <chrono>
+#include <map>
+#include <random>
+#include <iostream>
+#include <iomanip>
+#include <string>
+
+
+namespace crimson {
+  namespace qos_simulation {
+
+    template<typename ServerId, typename ClientId, typename TS, typename TC>
+    class Simulation {
+  
+    public:
+
+      using TimePoint = std::chrono::time_point<std::chrono::steady_clock>;
+
+    protected:
+
+      using ClientMap = std::map<ClientId,TC*>;
+      using ServerMap = std::map<ServerId,TS*>;
+
+      uint server_count = 0;
+      uint client_count = 0;
+
+      ServerMap servers;
+      ClientMap clients;
+      std::vector<ServerId> server_ids;
+
+      TimePoint early_time;
+      TimePoint servers_created_time;
+      TimePoint clients_created_time;
+      TimePoint clients_finished_time;
+      TimePoint late_time;
+
+      std::default_random_engine prng;
+
+      bool has_run = false;
+
+
+    public:
+
+      double fmt_tp(const TimePoint& t) {
+	auto c = t.time_since_epoch().count();
+	return uint64_t(c / 1000000.0 + 0.5) % 100000 / 1000.0;
+      }
+
+      TimePoint now() {
+	return std::chrono::steady_clock::now();
+      }
+
+      using ClientBasedServerSelectFunc =
+	std::function<const ServerId&(uint64_t, uint16_t)>;
+
+      using ClientFilter = std::function<bool(const ClientId&)>;
+
+      using ServerFilter = std::function<bool(const ServerId&)>;
+
+      using ServerDataOutF =
+	std::function<void(std::ostream& out,
+			   Simulation* sim, ServerFilter,
+			   int header_w, int data_w, int data_prec)>;
+
+      using ClientDataOutF =
+	std::function<void(std::ostream& out,
+			   Simulation* sim, ClientFilter,
+			   int header_w, int data_w, int data_prec)>;
+
+      Simulation() :
+	early_time(now()),
+	prng(std::chrono::system_clock::now().time_since_epoch().count())
+      {
+	// empty
+      }
+
+      uint get_client_count() const { return client_count; }
+      uint get_server_count() const { return server_count; }
+      TC& get_client(ClientId id) { return *clients[id]; }
+      TS& get_server(ServerId id) { return *servers[id]; }
+      const ServerId& get_server_id(uint index) const {
+	return server_ids[index];
+      }
+
+
+      void add_servers(uint count,
+		       std::function<TS*(ServerId)> create_server_f) {
+	uint i = server_count;
+
+	// increment server_count before creating servers since they
+	// will start running immediately and may use the server_count
+	// value; NB: this could still be an issue if servers are
+	// added with multiple add_servers calls; consider using a
+	// separate start function after all servers (and clients?)
+	// have been added
+	server_count += count;
+
+	for (; i < server_count; ++i) {
+	  server_ids.push_back(i);
+	  servers[i] = create_server_f(i);
+	}
+
+	servers_created_time = now();
+      }
+
+
+      void add_clients(uint count,
+		       std::function<TC*(ClientId)> create_client_f) {
+	uint i = client_count;
+
+	// increment client_count before creating clients since they
+	// will start running immediately and may use the client_count
+	// value (e.g., in the server selection function); NB: this could
+	// still be an issue if clients are added with multiple
+	// add_clients calls; consider using a separate start function
+	// after all clients have been added
+	client_count += count;
+
+	for (; i < client_count; ++i) {
+	  clients[i] = create_client_f(i);
+	}
+
+	clients_created_time = now();
+      }
+
+
+      void run() {
+	assert(server_count > 0);
+	assert(client_count > 0);
+
+	std::cout << "simulation started" << std::endl;
+
+	// clients are now running; wait for all to finish
+
+	for (auto const &i : clients) {
+	  i.second->wait_until_done();
+	}
+
+	late_time = clients_finished_time = now();
+
+	std::cout << "simulation completed in " <<
+	  std::chrono::duration_cast<std::chrono::milliseconds>(clients_finished_time - servers_created_time).count() <<
+	  " millisecs" << std::endl;
+
+	has_run = true;
+      } // run
+
+
+      void display_stats(std::ostream& out,
+			 ServerDataOutF server_out_f, ClientDataOutF client_out_f,
+			 ServerFilter server_filter =
+			 [] (const ServerId&) { return true; },
+			 ClientFilter client_filter =
+			 [] (const ClientId&) { return true; },
+			 int head_w = 12, int data_w = 7, int data_prec = 2) {
+	assert(has_run);
+
+	// skip first 2 secondsd of data
+	const std::chrono::seconds skip_amount(0);
+	// calculate in groups of 5 seconds
+	const std::chrono::seconds measure_unit(2);
+	// unit to output reports in
+	const std::chrono::seconds report_unit(1);
+
+	// compute and display stats
+
+	TimePoint earliest_start = late_time;
+	TimePoint latest_start = early_time;
+	TimePoint earliest_finish = late_time;
+	TimePoint latest_finish = early_time;
+
+	for (auto const &c : clients) {
+	  auto start = c.second->get_op_times().front();
+	  auto end = c.second->get_op_times().back();
+
+	  if (start < earliest_start) { earliest_start = start; }
+	  if (start > latest_start) { latest_start = start; }
+	  if (end < earliest_finish) { earliest_finish = end; }
+	  if (end > latest_finish) { latest_finish = end; }
+	}
+
+	double ops_factor =
+	  std::chrono::duration_cast<std::chrono::duration<double>>(measure_unit) /
+	  std::chrono::duration_cast<std::chrono::duration<double>>(report_unit);
+
+	const auto start_edge = clients_created_time + skip_amount;
+
+	std::map<ClientId,std::vector<double>> ops_data;
+
+	for (auto const &c : clients) {
+	  auto it = c.second->get_op_times().begin();
+	  const auto end = c.second->get_op_times().end();
+	  while (it != end && *it < start_edge) { ++it; }
+
+	  for (auto time_edge = start_edge + measure_unit;
+	       time_edge <= latest_finish + measure_unit;
+	       time_edge += measure_unit) {
+	    int count = 0;
+	    for (; it != end && *it < time_edge; ++count, ++it) { /* empty */ }
+	    double ops_per_second = double(count) / ops_factor;
+	    ops_data[c.first].push_back(ops_per_second);
+	  }
+	}
+
+	out << "==== Client Data ====" << std::endl;
+
+	out << std::setw(head_w) << "client:";
+	for (auto const &c : clients) {
+	  if (!client_filter(c.first)) continue;
+	  out << " " << std::setw(data_w) << c.first;
+	}
+	out << std::setw(data_w) << "total" << std::endl;
+
+	{
+	  bool has_data;
+	  size_t i = 0;
+	  do {
+	    std::string line_header = "t_" + std::to_string(i) + ":";
+	    out << std::setw(head_w) << line_header;
+	    has_data = false;
+	    double total = 0.0;
+	    for (auto const &c : clients) {
+	      double data = 0.0;
+	      if (i < ops_data[c.first].size()) {
+		data = ops_data[c.first][i];
+		has_data = true;
+	      }
+	      total += data;
+
+	      if (!client_filter(c.first)) continue;
+
+	      out << " " << std::setw(data_w) << std::setprecision(data_prec) <<
+		std::fixed << data;
+	    }
+	    out << " " << std::setw(data_w) << std::setprecision(data_prec) <<
+	      std::fixed << total << std::endl;
+	    ++i;
+	  } while(has_data);
+	}
+
+	client_out_f(out, this, client_filter, head_w, data_w, data_prec);
+
+	display_client_internal_stats<std::chrono::nanoseconds>(out,
+								"nanoseconds");
+
+	out << std::endl << "==== Server Data ====" << std::endl;
+
+	out << std::setw(head_w) << "server:";
+	for (auto const &s : servers) {
+	  if (!server_filter(s.first)) continue;
+	  out << " " << std::setw(data_w) << s.first;
+	}
+	out << " " << std::setw(data_w) << "total" << std::endl;
+
+	server_out_f(out, this, server_filter, head_w, data_w, data_prec);
+
+	display_server_internal_stats<std::chrono::nanoseconds>(out,
+								"nanoseconds");
+
+	// clean up clients then servers
+
+	for (auto i = clients.begin(); i != clients.end(); ++i) {
+	  delete i->second;
+	  i->second = nullptr;
+	}
+
+	for (auto i = servers.begin(); i != servers.end(); ++i) {
+	  delete i->second;
+	  i->second = nullptr;
+	}
+      } // display_stats
+
+
+      template<typename T>
+      void display_server_internal_stats(std::ostream& out,
+					 std::string time_unit) {
+	T add_request_time(0);
+	T request_complete_time(0);
+	uint32_t add_request_count = 0;
+	uint32_t request_complete_count = 0;
+
+	for (uint i = 0; i < get_server_count(); ++i) {
+	  const auto& server = get_server(i);
+	  const auto& is = server.get_internal_stats();
+	  add_request_time +=
+	    std::chrono::duration_cast<T>(is.add_request_time);
+	  request_complete_time +=
+	    std::chrono::duration_cast<T>(is.request_complete_time);
+	  add_request_count += is.add_request_count;
+	  request_complete_count += is.request_complete_count;
+	}
+
+	double add_request_time_per_unit =
+	  double(add_request_time.count()) / add_request_count ;
+	out << "total time to add requests: " <<
+	  std::fixed << add_request_time.count() << " " << time_unit <<
+	  ";" << std::endl <<
+	  "    count: " << add_request_count << ";" << std::endl <<
+	  "    average: " << add_request_time_per_unit <<
+	  " " << time_unit << " per request/response" << std::endl;
+
+	double request_complete_time_unit =
+	  double(request_complete_time.count()) / request_complete_count ;
+	out << "total time to note requests complete: " << std::fixed <<
+	  request_complete_time.count() << " " << time_unit << ";" <<
+	  std::endl << 
+	  "    count: " << request_complete_count << ";" << std::endl <<
+	  "    average: " << request_complete_time_unit <<
+	  " " << time_unit << " per request/response" << std::endl;
+
+	out << std::endl;
+
+	assert(add_request_count == request_complete_count);
+	out << "server timing for QOS algorithm: " <<
+	  add_request_time_per_unit + request_complete_time_unit <<
+	  " " << time_unit << " per request/response" << std::endl;
+      }
+
+
+      template<typename T>
+      void display_client_internal_stats(std::ostream& out,
+					 std::string time_unit) {
+	T track_resp_time(0);
+	T get_req_params_time(0);
+	uint32_t track_resp_count = 0;
+	uint32_t get_req_params_count = 0;
+
+	for (uint i = 0; i < get_client_count(); ++i) {
+	  const auto& client = get_client(i);
+	  const auto& is = client.get_internal_stats();
+	  track_resp_time +=
+	    std::chrono::duration_cast<T>(is.track_resp_time);
+	  get_req_params_time +=
+	    std::chrono::duration_cast<T>(is.get_req_params_time);
+	  track_resp_count += is.track_resp_count;
+	  get_req_params_count += is.get_req_params_count;
+	}
+
+	double track_resp_time_unit =
+	  double(track_resp_time.count()) / track_resp_count;
+	out << "total time to track responses: " <<
+	  std::fixed << track_resp_time.count() << " " << time_unit << ";" <<
+	  std::endl <<
+	  "    count: " << track_resp_count << ";" << std::endl <<
+	  "    average: " << track_resp_time_unit << " " << time_unit <<
+	  " per request/response" << std::endl;
+
+	double get_req_params_time_unit =
+	  double(get_req_params_time.count()) / get_req_params_count;
+	out << "total time to get request parameters: " <<
+	  std::fixed << get_req_params_time.count() << " " << time_unit <<
+	  ";" << std::endl <<
+	  "    count: " << get_req_params_count << ";" << std::endl <<
+	  "    average: " << get_req_params_time_unit << " " << time_unit <<
+	  " per request/response" << std::endl;
+
+	out << std::endl;
+
+	assert(track_resp_count == get_req_params_count);
+	out << "client timing for QOS algorithm: " <<
+	  track_resp_time_unit + get_req_params_time_unit << " " <<
+	  time_unit << " per request/response" << std::endl;
+      }
+
+
+      // **** server selection functions ****
+
+
+      const ServerId& server_select_alternate(uint64_t seed,
+					      uint16_t client_idx) {
+	uint index = (client_idx + seed) % server_count;
+	return server_ids[index];
+      }
+
+
+      // returns a lambda using the range specified as servers_per (client)
+      ClientBasedServerSelectFunc
+      make_server_select_alt_range(uint16_t servers_per) {
+	return [servers_per,this](uint64_t seed, uint16_t client_idx)
+	  -> const ServerId& {
+	  double factor = double(server_count) / client_count;
+	  uint offset = seed % servers_per;
+	  uint index = (uint(0.5 + client_idx * factor) + offset) % server_count;
+	  return server_ids[index];
+	};
+      }
+
+
+      // function to choose a server randomly
+      const ServerId& server_select_random(uint64_t seed, uint16_t client_idx) {
+	uint index = prng() % server_count;
+	return server_ids[index];
+      }
+
+  
+      // function to choose a server randomly
+      ClientBasedServerSelectFunc
+      make_server_select_ran_range(uint16_t servers_per) {
+	return [servers_per,this](uint64_t seed, uint16_t client_idx)
+	  -> const ServerId& {
+	  double factor = double(server_count) / client_count;
+	  uint offset = prng() % servers_per;
+	  uint index = (uint(0.5 + client_idx * factor) + offset) % server_count;
+	  return server_ids[index];
+	};
+      }
+
+
+      // function to always choose the first server
+      const ServerId& server_select_0(uint64_t seed, uint16_t client_idx) {
+	return server_ids[0];
+      }
+    }; // class Simulation
+
+  }; // namespace qos_simulation
+}; // namespace crimson
diff --git a/sim/src/ssched/ssched_client.h b/sim/src/ssched/ssched_client.h
new file mode 100644
index 00000000000..dcbe0771de5
--- /dev/null
+++ b/sim/src/ssched/ssched_client.h
@@ -0,0 +1,44 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ */
+
+
+#pragma once
+
+#include "ssched_recs.h"
+
+
+namespace crimson {
+  namespace simple_scheduler {
+
+    // S is server identifier type
+    template<typename S>
+    class ServiceTracker {
+
+    public:
+
+      // we have to start the counters at 1, as 0 is used in the
+      // cleaning process
+      ServiceTracker()
+      {
+	// emptry
+      }
+
+
+      void track_resp(const S& server_id, const NullData& ignore) {
+	// empty
+      }
+
+
+      /*
+       * Returns the ReqParams for the given server.
+       */
+      ReqParams get_req_params(const S& server) {
+	return ReqParams();
+      } // get_req_params
+    }; // class ServiceTracker
+  } // namespace simple_scheduler
+} // namespace crimson
diff --git a/sim/src/ssched/ssched_recs.h b/sim/src/ssched/ssched_recs.h
new file mode 100644
index 00000000000..3332d5a4933
--- /dev/null
+++ b/sim/src/ssched/ssched_recs.h
@@ -0,0 +1,37 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ */
+
+
+#pragma once
+
+
+#include <ostream>
+#include <assert.h>
+
+
+namespace crimson {
+  namespace simple_scheduler {
+
+    // since we send no additional data out
+    // NOTE: Change name to RespParams? Is it used elsewhere?
+    struct NullData {
+      friend std::ostream& operator<<(std::ostream& out, const NullData& n) {
+	out << "NullData{ EMPTY }";
+	return out;
+      }
+    }; // struct NullData
+
+
+    struct ReqParams {
+      friend std::ostream& operator<<(std::ostream& out, const ReqParams& rp) {
+	out << "ReqParams{ EMPTY }";
+	return out;
+      }
+    };
+
+  }
+}
diff --git a/sim/src/ssched/ssched_server.h b/sim/src/ssched/ssched_server.h
new file mode 100644
index 00000000000..ee4c1e6e3ef
--- /dev/null
+++ b/sim/src/ssched/ssched_server.h
@@ -0,0 +1,182 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ */
+
+#pragma once
+
+#include <memory>
+#include <mutex>
+#include <deque>
+
+#include "boost/variant.hpp"
+
+#include "ssched_recs.h"
+
+#ifdef PROFILE
+#include "profile.h"
+#endif
+
+namespace crimson {
+
+  namespace simple_scheduler {
+
+    template<typename C, typename R, typename Time>
+    class SimpleQueue {
+
+    public:
+
+      using RequestRef = std::unique_ptr<R>;
+
+      // a function to see whether the server can handle another request
+      using CanHandleRequestFunc = std::function<bool(void)>;
+
+      // a function to submit a request to the server; the second
+      // parameter is a callback when it's completed
+      using HandleRequestFunc =
+	std::function<void(const C&,RequestRef,NullData)>;
+
+      struct PullReq {
+	enum class Type { returning, none };
+
+	struct Retn {
+	  C           client;
+	  RequestRef  request;
+	};
+
+	Type                 type;
+	boost::variant<Retn> data;
+      };
+
+    protected:
+
+      enum class Mechanism { push, pull };
+
+      struct QRequest {
+	C          client;
+	RequestRef request;
+      };
+
+      bool finishing = false;
+      Mechanism mechanism;
+
+      CanHandleRequestFunc can_handle_f;
+      HandleRequestFunc handle_f;
+
+      mutable std::mutex queue_mtx;
+      using DataGuard = std::lock_guard<decltype(queue_mtx)>;
+
+      std::deque<QRequest> queue;
+
+#ifdef PROFILE
+    public:
+      ProfileTimer<std::chrono::nanoseconds> pull_request_timer;
+      ProfileTimer<std::chrono::nanoseconds> add_request_timer;
+      ProfileTimer<std::chrono::nanoseconds> request_complete_timer;
+    protected:
+#endif
+
+    public:
+
+      // push full constructor
+      SimpleQueue(CanHandleRequestFunc _can_handle_f,
+		  HandleRequestFunc _handle_f) :
+	mechanism(Mechanism::push),
+	can_handle_f(_can_handle_f),
+	handle_f(_handle_f)
+      {
+	// empty
+      }
+
+      SimpleQueue() :
+	mechanism(Mechanism::pull)
+      {
+	// empty
+      }
+
+      ~SimpleQueue() {
+	finishing = true;
+      }
+
+      void add_request(const R& request,
+		       const C& client_id,
+		       const ReqParams& req_params) {
+	add_request(RequestRef(new R(request)), client_id, req_params);
+      }
+
+      void add_request(RequestRef&& request,
+		       const C& client_id,
+		       const ReqParams& req_params) {
+	DataGuard g(queue_mtx);
+
+#ifdef PROFILE
+	add_request_timer.start();
+#endif
+	queue.emplace_back(QRequest{client_id, std::move(request)});
+
+	if (Mechanism::push == mechanism) {
+	  schedule_request();
+	}
+
+#ifdef PROFILE
+	add_request_timer.stop();
+#endif
+      } // add_request
+
+      void request_completed() {
+	assert(Mechanism::push == mechanism);
+	DataGuard g(queue_mtx);
+
+#ifdef PROFILE
+	request_complete_timer.start();
+#endif
+	schedule_request();
+
+#ifdef PROFILE
+	request_complete_timer.stop();
+#endif
+      } // request_completed
+
+      PullReq pull_request() {
+	assert(Mechanism::pull == mechanism);
+	PullReq result;
+	DataGuard g(queue_mtx);
+
+#ifdef PROFILE
+	pull_request_timer.start();
+#endif
+
+	if (queue.empty()) {
+	  result.type = PullReq::Type::none;
+	} else {
+	  auto front = queue.front();
+	  result.type = PullReq::Type::returning;
+	  result.data =
+	    typename PullReq::Retn{front.client, std::move(front.request)};
+	  queue.pop();
+	}
+
+#ifdef PROFILE
+	pull_request_timer.stop();
+#endif
+
+	return result;
+      }
+
+    protected:
+
+      // queue_mtx should be held when called; should only be called
+      // when mechanism is push
+      void schedule_request() {
+	if (!queue.empty() && can_handle_f()) {
+	  auto& front = queue.front();
+	  static NullData null_data;
+	  handle_f(front.client, std::move(front.request), null_data);
+	  queue.pop_front();
+	}
+      }
+    };
+  };
+};
diff --git a/sim/src/str_list.cc b/sim/src/str_list.cc
new file mode 100644
index 00000000000..22109e00840
--- /dev/null
+++ b/sim/src/str_list.cc
@@ -0,0 +1,106 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2009-2010 Dreamhost
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#include "str_list.h"
+
+using std::string;
+using std::vector;
+using std::set;
+using std::list;
+
+static bool get_next_token(const string &s, size_t& pos, const char *delims, string& token)
+{
+  int start = s.find_first_not_of(delims, pos);
+  int end;
+
+  if (start < 0){
+    pos = s.size();
+    return false;
+  }
+
+  end = s.find_first_of(delims, start);
+  if (end >= 0)
+    pos = end + 1;
+  else {
+    pos = end = s.size();
+  }
+
+  token = s.substr(start, end - start);
+  return true;
+}
+
+void get_str_list(const string& str, const char *delims, list<string>& str_list)
+{
+  size_t pos = 0;
+  string token;
+
+  str_list.clear();
+
+  while (pos < str.size()) {
+    if (get_next_token(str, pos, delims, token)) {
+      if (token.size() > 0) {
+        str_list.push_back(token);
+      }
+    }
+  }
+}
+
+void get_str_list(const string& str, list<string>& str_list)
+{
+  const char *delims = ";,= \t";
+  return get_str_list(str, delims, str_list);
+}
+
+void get_str_vec(const string& str, const char *delims, vector<string>& str_vec)
+{
+  size_t pos = 0;
+  string token;
+  str_vec.clear();
+
+  while (pos < str.size()) {
+    if (get_next_token(str, pos, delims, token)) {
+      if (token.size() > 0) {
+        str_vec.push_back(token);
+      }
+    }
+  }
+}
+
+void get_str_vec(const string& str, vector<string>& str_vec)
+{
+  const char *delims = ";,= \t";
+  return get_str_vec(str, delims, str_vec);
+}
+
+void get_str_set(const string& str, const char *delims, set<string>& str_set)
+{
+  size_t pos = 0;
+  string token;
+
+  str_set.clear();
+
+  while (pos < str.size()) {
+    if (get_next_token(str, pos, delims, token)) {
+      if (token.size() > 0) {
+        str_set.insert(token);
+      }
+    }
+  }
+}
+
+void get_str_set(const string& str, set<string>& str_set)
+{
+  const char *delims = ";,= \t";
+  return get_str_set(str, delims, str_set);
+}
diff --git a/sim/src/str_list.h b/sim/src/str_list.h
new file mode 100644
index 00000000000..4ba0cadd960
--- /dev/null
+++ b/sim/src/str_list.h
@@ -0,0 +1,94 @@
+#ifndef CEPH_STRLIST_H
+#define CEPH_STRLIST_H
+
+#include <list>
+#include <set>
+#include <sstream>
+#include <string>
+#include <vector>
+
+/**
+ * Split **str** into a list of strings, using the ";,= \t" delimiters and output the result in **str_list**.
+ * 
+ * @param [in] str String to split and save as list
+ * @param [out] str_list List modified containing str after it has been split
+**/
+extern void get_str_list(const std::string& str,
+			 std::list<std::string>& str_list);
+
+/**
+ * Split **str** into a list of strings, using the **delims** delimiters and output the result in **str_list**.
+ * 
+ * @param [in] str String to split and save as list
+ * @param [in] delims characters used to split **str**
+ * @param [out] str_list List modified containing str after it has been split
+**/
+extern void get_str_list(const std::string& str,
+                         const char *delims,
+			 std::list<std::string>& str_list);
+
+/**
+ * Split **str** into a list of strings, using the ";,= \t" delimiters and output the result in **str_vec**.
+ * 
+ * @param [in] str String to split and save as Vector
+ * @param [out] str_vec Vector modified containing str after it has been split
+**/
+extern void get_str_vec(const std::string& str,
+			 std::vector<std::string>& str_vec);
+
+/**
+ * Split **str** into a list of strings, using the **delims** delimiters and output the result in **str_vec**.
+ * 
+ * @param [in] str String to split and save as Vector
+ * @param [in] delims characters used to split **str**
+ * @param [out] str_vec Vector modified containing str after it has been split
+**/
+extern void get_str_vec(const std::string& str,
+                         const char *delims,
+			 std::vector<std::string>& str_vec);
+
+/**
+ * Split **str** into a list of strings, using the ";,= \t" delimiters and output the result in **str_list**.
+ * 
+ * @param [in] str String to split and save as Set
+ * @param [out] str_list Set modified containing str after it has been split
+**/
+extern void get_str_set(const std::string& str,
+			std::set<std::string>& str_list);
+
+/**
+ * Split **str** into a list of strings, using the **delims** delimiters and output the result in **str_list**.
+ * 
+ * @param [in] str String to split and save as Set
+ * @param [in] delims characters used to split **str**
+ * @param [out] str_list Set modified containing str after it has been split
+**/
+extern void get_str_set(const std::string& str,
+                        const char *delims,
+			std::set<std::string>& str_list);
+
+/**
+ * Return a String containing the vector **v** joined with **sep**
+ * 
+ * If **v** is empty, the function returns an empty string
+ * For each element in **v**,
+ * it will concatenate this element and **sep** with result
+ * 
+ * @param [in] v Vector to join as a String
+ * @param [in] sep String used to join each element from **v**
+ * @return empty string if **v** is empty or concatenated string
+**/
+inline std::string str_join(const std::vector<std::string>& v, std::string sep)
+{
+  if (v.empty())
+    return std::string();
+  std::vector<std::string>::const_iterator i = v.begin();
+  std::string r = *i;
+  for (++i; i != v.end(); ++i) {
+    r += sep;
+    r += *i;
+  }
+  return r;
+}
+
+#endif
diff --git a/sim/src/test_dmclock.cc b/sim/src/test_dmclock.cc
new file mode 100644
index 00000000000..8e7aa4ab219
--- /dev/null
+++ b/sim/src/test_dmclock.cc
@@ -0,0 +1,40 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ */
+
+
+#include "dmclock_recs.h"
+#include "dmclock_server.h"
+#include "dmclock_client.h"
+
+#include "sim_recs.h"
+#include "sim_server.h"
+#include "sim_client.h"
+
+#include "test_dmclock.h"
+
+
+namespace test = crimson::test_dmc;
+
+
+void test::dmc_server_accumulate_f(test::DmcAccum& a,
+				   const test::dmc::PhaseType& phase) {
+  if (test::dmc::PhaseType::reservation == phase) {
+    ++a.reservation_count;
+  } else {
+    ++a.proportion_count;
+  }
+}
+
+
+void test::dmc_client_accumulate_f(test::DmcAccum& a,
+				   const test::dmc::PhaseType& phase) {
+  if (test::dmc::PhaseType::reservation == phase) {
+    ++a.reservation_count;
+  } else {
+    ++a.proportion_count;
+  }
+}
diff --git a/sim/src/test_dmclock.h b/sim/src/test_dmclock.h
new file mode 100644
index 00000000000..7f1e55439ed
--- /dev/null
+++ b/sim/src/test_dmclock.h
@@ -0,0 +1,56 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ */
+
+
+#include "dmclock_recs.h"
+#include "dmclock_server.h"
+#include "dmclock_client.h"
+
+#include "sim_recs.h"
+#include "sim_server.h"
+#include "sim_client.h"
+
+#include "simulate.h"
+
+
+namespace crimson {
+  namespace test_dmc {
+    
+    namespace dmc = crimson::dmclock;
+    namespace sim = crimson::qos_simulation;
+
+    struct DmcAccum {
+      uint64_t reservation_count = 0;
+      uint64_t proportion_count = 0;
+    };
+
+    using DmcQueue = dmc::PushPriorityQueue<ClientId,sim::TestRequest>;
+
+    using DmcServer = sim::SimulatedServer<DmcQueue,
+					   dmc::ReqParams,
+					   dmc::PhaseType,
+					   DmcAccum>;
+
+    using DmcClient = sim::SimulatedClient<dmc::ServiceTracker<ServerId>,
+					   dmc::ReqParams,
+					   dmc::PhaseType,
+					   DmcAccum>;
+
+    using CreateQueueF = std::function<DmcQueue*(DmcQueue::CanHandleRequestFunc,
+						 DmcQueue::HandleRequestFunc)>;
+
+    using MySim = sim::Simulation<ServerId,ClientId,DmcServer,DmcClient>;
+
+    using SubmitFunc = DmcClient::SubmitFunc;
+
+    extern void dmc_server_accumulate_f(DmcAccum& a,
+					const dmc::PhaseType& phase);
+
+    extern void dmc_client_accumulate_f(DmcAccum& a,
+					const dmc::PhaseType& phase);
+  } // namespace test_dmc
+} // namespace crimson
diff --git a/sim/src/test_dmclock_main.cc b/sim/src/test_dmclock_main.cc
new file mode 100644
index 00000000000..c3ba1e18fbd
--- /dev/null
+++ b/sim/src/test_dmclock_main.cc
@@ -0,0 +1,322 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ */
+
+
+#include "test_dmclock.h"
+#include "config.h"
+
+#ifdef PROFILE
+#include "profile.h"
+#endif
+
+
+namespace dmc = crimson::dmclock;
+namespace test = crimson::test_dmc;
+namespace sim = crimson::qos_simulation;
+
+using namespace std::placeholders;
+
+
+namespace crimson {
+    namespace test_dmc {
+        void server_data(std::ostream& out,
+                         test::MySim* sim,
+                         test::MySim::ServerFilter server_disp_filter,
+                         int head_w, int data_w, int data_prec);
+
+        void client_data(std::ostream& out,
+                         test::MySim* sim,
+                         test::MySim::ClientFilter client_disp_filter,
+                         int head_w, int data_w, int data_prec);
+    }
+}
+
+
+int main(int argc, char* argv[]) {
+    std::vector<const char*> args;
+    for (int i = 1; i < argc; ++i) {
+      args.push_back(argv[i]);
+    }
+
+    std::string conf_file_list;
+    sim::ceph_argparse_early_args(args, &conf_file_list);
+
+    sim::sim_config_t g_conf;
+    std::vector<sim::cli_group_t> &cli_group = g_conf.cli_group;
+    std::vector<sim::srv_group_t> &srv_group = g_conf.srv_group;
+
+    if (!conf_file_list.empty()) {
+      int ret;
+      ret = sim::parse_config_file(conf_file_list, g_conf);
+      if (ret) {
+	// error
+	_exit(1);
+      }
+    } else {
+      // default simulation parameter
+      g_conf.client_groups = 2;
+
+      sim::srv_group_t st;
+      srv_group.push_back(st);
+
+      sim::cli_group_t ct1(99, 0);
+      cli_group.push_back(ct1);
+
+      sim::cli_group_t ct2(1, 10);
+      cli_group.push_back(ct2);
+    }
+
+    const uint server_groups = g_conf.server_groups;
+    const uint client_groups = g_conf.client_groups;
+    const bool server_random_selection = g_conf.server_random_selection;
+    const bool server_soft_limit = g_conf.server_soft_limit;
+    uint server_total_count = 0;
+    uint client_total_count = 0;
+
+    for (uint i = 0; i < client_groups; ++i) {
+      client_total_count += cli_group[i].client_count;
+    }
+
+    for (uint i = 0; i < server_groups; ++i) {
+      server_total_count += srv_group[i].server_count;
+    }
+
+    std::vector<test::dmc::ClientInfo> client_info;
+    for (uint i = 0; i < client_groups; ++i) {
+      client_info.push_back(test::dmc::ClientInfo 
+			  { cli_group[i].client_reservation,
+			    cli_group[i].client_weight,
+			    cli_group[i].client_limit } );
+    }
+
+    auto ret_client_group_f = [&](const ClientId& c) -> uint {
+      uint group_max = 0;
+      uint i = 0;
+      for (; i < client_groups; ++i) {
+	group_max += cli_group[i].client_count;
+	if (c < group_max) {
+	  break;
+	}
+      }
+      return i;
+    };
+
+    auto ret_server_group_f = [&](const ServerId& s) -> uint {
+      uint group_max = 0;
+      uint i = 0;
+      for (; i < server_groups; ++i) {
+	group_max += srv_group[i].server_count;
+	if (s < group_max) {
+	  break;
+	}
+      }
+      return i;
+    };
+
+    auto client_info_f = [=](const ClientId& c) -> test::dmc::ClientInfo {
+      return client_info[ret_client_group_f(c)];
+    };
+
+    auto client_disp_filter = [=] (const ClientId& i) -> bool {
+        return i < 3 || i >= (client_total_count - 3);
+    };
+
+    auto server_disp_filter = [=] (const ServerId& i) -> bool {
+        return i < 3 || i >= (server_total_count - 3);
+    };
+
+
+    test::MySim *simulation;
+  
+
+    // lambda to post a request to the identified server; called by client
+    test::SubmitFunc server_post_f =
+        [&simulation](const ServerId& server,
+                      const sim::TestRequest& request,
+                      const ClientId& client_id,
+                      const test::dmc::ReqParams& req_params) {
+        test::DmcServer& s = simulation->get_server(server);
+        s.post(request, client_id, req_params);
+    };
+
+    std::vector<std::vector<sim::CliInst>> cli_inst;
+    for (uint i = 0; i < client_groups; ++i) {
+      if (cli_group[i].client_wait == std::chrono::seconds(0)) {
+	cli_inst.push_back(
+	    { { sim::req_op, 
+	        (uint32_t)cli_group[i].client_total_ops,
+	        (double)cli_group[i].client_iops_goal, 
+	        (uint16_t)cli_group[i].client_outstanding_ops } } );
+      } else {
+	cli_inst.push_back(
+	    { { sim::wait_op, cli_group[i].client_wait },
+	      { sim::req_op, 
+	        (uint32_t)cli_group[i].client_total_ops,
+		(double)cli_group[i].client_iops_goal, 
+		(uint16_t)cli_group[i].client_outstanding_ops } } );
+      }
+    }
+
+    simulation = new test::MySim();
+
+    test::DmcServer::ClientRespFunc client_response_f =
+        [&simulation](ClientId client_id,
+                      const sim::TestResponse& resp,
+                      const ServerId& server_id,
+                      const dmc::PhaseType& phase) {
+        simulation->get_client(client_id).receive_response(resp,
+                                                           server_id,
+                                                           phase);
+    };
+
+    test::CreateQueueF create_queue_f =
+        [&](test::DmcQueue::CanHandleRequestFunc can_f,
+            test::DmcQueue::HandleRequestFunc handle_f) -> test::DmcQueue* {
+        return new test::DmcQueue(client_info_f, can_f, handle_f, server_soft_limit);
+    };
+
+ 
+    auto create_server_f = [&](ServerId id) -> test::DmcServer* {
+      uint i = ret_server_group_f(id);
+      return new test::DmcServer(id,
+                                 srv_group[i].server_iops,
+				 srv_group[i].server_threads,
+				 client_response_f,
+				 test::dmc_server_accumulate_f,
+				 create_queue_f);
+    };
+
+    auto create_client_f = [&](ClientId id) -> test::DmcClient* {
+      uint i = ret_client_group_f(id);
+      test::MySim::ClientBasedServerSelectFunc server_select_f;
+      uint client_server_select_range = cli_group[i].client_server_select_range;
+      if (!server_random_selection) {
+	server_select_f = simulation->make_server_select_alt_range(client_server_select_range);
+      } else {
+	server_select_f = simulation->make_server_select_ran_range(client_server_select_range);
+      }
+      return new test::DmcClient(id,
+				 server_post_f,
+				 std::bind(server_select_f, _1, id),
+				 test::dmc_client_accumulate_f,
+				 cli_inst[i]);
+    };
+
+#if 1
+    std::cout << "[global]" << std::endl << g_conf << std::endl;
+    for (uint i = 0; i < client_groups; ++i) {
+      std::cout << std::endl << "[client." << i << "]" << std::endl;
+      std::cout << cli_group[i] << std::endl;
+    }
+    for (uint i = 0; i < server_groups; ++i) {
+      std::cout << std::endl << "[server." << i << "]" << std::endl;
+      std::cout << srv_group[i] << std::endl;
+    }
+    std::cout << std::endl;
+#endif
+
+    simulation->add_servers(server_total_count, create_server_f);
+    simulation->add_clients(client_total_count, create_client_f);
+
+    simulation->run();
+    simulation->display_stats(std::cout,
+                              &test::server_data, &test::client_data,
+                              server_disp_filter, client_disp_filter);
+} // main
+
+
+void test::client_data(std::ostream& out,
+		 test::MySim* sim,
+		 test::MySim::ClientFilter client_disp_filter,
+		 int head_w, int data_w, int data_prec) {
+    // report how many ops were done by reservation and proportion for
+    // each client
+
+    int total_r = 0;
+    out << std::setw(head_w) << "res_ops:";
+    for (uint i = 0; i < sim->get_client_count(); ++i) {
+        const auto& client = sim->get_client(i);
+        auto r = client.get_accumulator().reservation_count;
+        total_r += r;
+        if (!client_disp_filter(i)) continue;
+        out << " " << std::setw(data_w) << r;
+    }
+    out << " " << std::setw(data_w) << std::setprecision(data_prec) <<
+        std::fixed << total_r << std::endl;
+
+    int total_p = 0;
+    out << std::setw(head_w) << "prop_ops:";
+    for (uint i = 0; i < sim->get_client_count(); ++i) {
+        const auto& client = sim->get_client(i);
+        auto p = client.get_accumulator().proportion_count;
+        total_p += p;
+        if (!client_disp_filter(i)) continue;
+        out << " " << std::setw(data_w) << p;
+    }
+    out << " " << std::setw(data_w) << std::setprecision(data_prec) <<
+        std::fixed << total_p << std::endl;
+}
+
+
+void test::server_data(std::ostream& out,
+		 test::MySim* sim,
+		 test::MySim::ServerFilter server_disp_filter,
+		 int head_w, int data_w, int data_prec) {
+    out << std::setw(head_w) << "res_ops:";
+    int total_r = 0;
+    for (uint i = 0; i < sim->get_server_count(); ++i) {
+        const auto& server = sim->get_server(i);
+        auto rc = server.get_accumulator().reservation_count;
+        total_r += rc;
+        if (!server_disp_filter(i)) continue;
+        out << " " << std::setw(data_w) << rc;
+    }
+    out << " " << std::setw(data_w) << std::setprecision(data_prec) <<
+        std::fixed << total_r << std::endl;
+
+    out << std::setw(head_w) << "prop_ops:";
+    int total_p = 0;
+    for (uint i = 0; i < sim->get_server_count(); ++i) {
+        const auto& server = sim->get_server(i);
+        auto pc = server.get_accumulator().proportion_count;
+        total_p += pc;
+        if (!server_disp_filter(i)) continue;
+        out << " " << std::setw(data_w) << pc;
+    }
+    out << " " << std::setw(data_w) << std::setprecision(data_prec) <<
+        std::fixed << total_p << std::endl;
+
+    const auto& q = sim->get_server(0).get_priority_queue();
+    out << std::endl <<
+	" k-way heap: " << q.get_heap_branching_factor() << std::endl
+	<< std::endl;
+
+#ifdef PROFILE
+    crimson::ProfileCombiner<std::chrono::nanoseconds> art_combiner;
+    crimson::ProfileCombiner<std::chrono::nanoseconds> rct_combiner;
+    for (uint i = 0; i < sim->get_server_count(); ++i) {
+      const auto& q = sim->get_server(i).get_priority_queue();
+      const auto& art = q.add_request_timer;
+      art_combiner.combine(art);
+      const auto& rct = q.request_complete_timer;
+      rct_combiner.combine(rct);
+    }
+    out << "Server add_request_timer: count:" << art_combiner.get_count() <<
+      ", mean:" << art_combiner.get_mean() <<
+      ", std_dev:" << art_combiner.get_std_dev() <<
+      ", low:" << art_combiner.get_low() <<
+      ", high:" << art_combiner.get_high() << std::endl;
+    out << "Server request_complete_timer: count:" << rct_combiner.get_count() <<
+      ", mean:" << rct_combiner.get_mean() <<
+      ", std_dev:" << rct_combiner.get_std_dev() <<
+      ", low:" << rct_combiner.get_low() <<
+      ", high:" << rct_combiner.get_high() << std::endl;
+    out << "Server combined mean: " <<
+      (art_combiner.get_mean() + rct_combiner.get_mean()) <<
+      std::endl;
+#endif
+}
diff --git a/sim/src/test_ssched.cc b/sim/src/test_ssched.cc
new file mode 100644
index 00000000000..e28b015cbdb
--- /dev/null
+++ b/sim/src/test_ssched.cc
@@ -0,0 +1,33 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ */
+
+
+#include "ssched_recs.h"
+#include "ssched_server.h"
+#include "ssched_client.h"
+
+#include "sim_recs.h"
+#include "sim_server.h"
+#include "sim_client.h"
+
+#include "test_ssched.h"
+
+
+namespace test = crimson::test_simple_scheduler;
+namespace ssched = crimson::simple_scheduler;
+
+
+void test::simple_server_accumulate_f(test::SimpleAccum& a,
+				      const ssched::NullData& add_info) {
+  ++a.request_count;
+}
+
+
+void test::simple_client_accumulate_f(test::SimpleAccum& a,
+				      const ssched::NullData& ignore) {
+  // empty
+}
diff --git a/sim/src/test_ssched.h b/sim/src/test_ssched.h
new file mode 100644
index 00000000000..96ac33ff376
--- /dev/null
+++ b/sim/src/test_ssched.h
@@ -0,0 +1,57 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ */
+
+
+#include "ssched_server.h"
+#include "ssched_client.h"
+
+#include "sim_recs.h"
+#include "sim_server.h"
+#include "sim_client.h"
+
+#include "simulate.h"
+
+
+namespace crimson {
+  namespace test_simple_scheduler {
+
+    namespace ssched = crimson::simple_scheduler;
+    namespace sim = crimson::qos_simulation;
+
+    using Time = double;
+
+    struct SimpleAccum {
+      uint32_t request_count = 0;
+    };
+
+    using SimpleQueue = ssched::SimpleQueue<ClientId,sim::TestRequest,Time>;
+
+    using SimpleServer = sim::SimulatedServer<SimpleQueue,
+					      ssched::ReqParams,
+					      ssched::NullData,
+					      SimpleAccum>;
+    using SimpleClient = sim::SimulatedClient<ssched::ServiceTracker<ServerId>,
+					      ssched::ReqParams,
+					      ssched::NullData,
+					      SimpleAccum>;
+
+    using CreateQueueF =
+      std::function<SimpleQueue*(SimpleQueue::CanHandleRequestFunc,
+				 SimpleQueue::HandleRequestFunc)>;
+
+
+    using MySim = sim::Simulation<ServerId,ClientId,SimpleServer,SimpleClient>;
+  
+    using SubmitFunc = SimpleClient::SubmitFunc;
+
+    extern void simple_server_accumulate_f(SimpleAccum& a,
+					   const ssched::NullData& add_info);
+
+    extern void simple_client_accumulate_f(SimpleAccum& a,
+					   const ssched::NullData& ignore);
+  } // namespace test_simple
+} // namespace crimson
diff --git a/sim/src/test_ssched_main.cc b/sim/src/test_ssched_main.cc
new file mode 100644
index 00000000000..6df20dc5f89
--- /dev/null
+++ b/sim/src/test_ssched_main.cc
@@ -0,0 +1,187 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ */
+
+
+#include "test_ssched.h"
+
+
+#ifdef PROFILE
+#include "profile.h"
+#endif
+
+
+namespace test = crimson::test_simple_scheduler;
+namespace ssched = crimson::simple_scheduler;
+namespace sim = crimson::qos_simulation;
+
+using namespace std::placeholders;
+
+
+namespace crimson {
+  namespace test_simple_scheduler {
+    void client_data(std::ostream& out,
+		     test::MySim* sim,
+		     test::MySim::ClientFilter client_disp_filter,
+		     int head_w, int data_w, int data_prec);
+
+    void server_data(std::ostream& out,
+		     test::MySim* sim,
+		     test::MySim::ServerFilter server_disp_filter,
+		     int head_w, int data_w, int data_prec);
+  } // namespace test_simple
+} // namespace crimson
+    
+
+int main(int argc, char* argv[]) {
+  // server params
+
+  const uint server_count = 100;
+  const uint server_iops = 40;
+  const uint server_threads = 1;
+
+  // client params
+
+  const uint client_total_ops = 1000;
+  const uint client_count = 100;
+  const uint client_server_select_range = 10;
+  const uint client_wait_count = 1;
+  const uint client_iops_goal = 50;
+  const uint client_outstanding_ops = 100;
+  const std::chrono::seconds client_wait(10);
+
+  auto client_disp_filter = [=] (const ClientId& i) -> bool {
+    return i < 3 || i >= (client_count - 3);
+  };
+
+  auto server_disp_filter = [=] (const ServerId& i) -> bool {
+    return i < 3 || i >= (server_count - 3);
+  };
+
+
+  test::MySim *simulation;
+
+  // lambda to post a request to the identified server; called by client
+  test::SubmitFunc server_post_f =
+    [&simulation](const ServerId& server_id,
+		  const sim::TestRequest& request,
+		  const ClientId& client_id,
+		  const ssched::ReqParams& req_params) {
+    auto& server = simulation->get_server(server_id);
+    server.post(request, client_id, req_params);
+  };
+
+  static std::vector<sim::CliInst> no_wait =
+    { { sim::req_op, client_total_ops, client_iops_goal, client_outstanding_ops } };
+  static std::vector<sim::CliInst> wait =
+    { { sim::wait_op, client_wait },
+      { sim::req_op, client_total_ops, client_iops_goal, client_outstanding_ops } };
+
+  simulation = new test::MySim();
+
+#if 1
+  test::MySim::ClientBasedServerSelectFunc server_select_f =
+    simulation->make_server_select_alt_range(client_server_select_range);
+#elif 0
+  test::MySim::ClientBasedServerSelectFunc server_select_f =
+    std::bind(&test::MySim::server_select_random, simulation, _1, _2);
+#else
+  test::MySim::ClientBasedServerSelectFunc server_select_f =
+    std::bind(&test::MySim::server_select_0, simulation, _1, _2);
+#endif
+
+  test::SimpleServer::ClientRespFunc client_response_f =
+    [&simulation](ClientId client_id,
+		  const sim::TestResponse& resp,
+		  const ServerId& server_id,
+		  const ssched::NullData& resp_params) {
+    simulation->get_client(client_id).receive_response(resp,
+						       server_id,
+						       resp_params);
+  };
+
+  test::CreateQueueF create_queue_f =
+    [&](test::SimpleQueue::CanHandleRequestFunc can_f,
+	test::SimpleQueue::HandleRequestFunc handle_f) -> test::SimpleQueue* {
+    return new test::SimpleQueue(can_f, handle_f);
+  };
+
+  auto create_server_f = [&](ServerId id) -> test::SimpleServer* {
+    return new test::SimpleServer(id,
+				  server_iops, server_threads,
+				  client_response_f,
+				  test::simple_server_accumulate_f,
+				  create_queue_f);
+  };
+
+  auto create_client_f = [&](ClientId id) -> test::SimpleClient* {
+    return new test::SimpleClient(id,
+				  server_post_f,
+				  std::bind(server_select_f, _1, id),
+				  test::simple_client_accumulate_f,
+				  id < (client_count - client_wait_count)
+				  ? no_wait : wait);
+  };
+
+  simulation->add_servers(server_count, create_server_f);
+  simulation->add_clients(client_count, create_client_f);
+
+  simulation->run();
+  simulation->display_stats(std::cout,
+			    &test::server_data, &test::client_data,
+			    server_disp_filter, client_disp_filter);
+} // main
+
+
+void test::client_data(std::ostream& out,
+		       test::MySim* sim,
+		       test::MySim::ClientFilter client_disp_filter,
+		       int head_w, int data_w, int data_prec) {
+  // empty
+}
+
+
+void test::server_data(std::ostream& out,
+		       test::MySim* sim,
+		       test::MySim::ServerFilter server_disp_filter,
+		       int head_w, int data_w, int data_prec) {
+  out << std::setw(head_w) << "requests:";
+  int total_req = 0;
+  for (uint i = 0; i < sim->get_server_count(); ++i) {
+    const auto& server = sim->get_server(i);
+    auto req_count = server.get_accumulator().request_count;
+    total_req += req_count;
+    if (!server_disp_filter(i)) continue;
+    out << std::setw(data_w) << req_count;
+  }
+  out << std::setw(data_w) << std::setprecision(data_prec) <<
+    std::fixed << total_req << std::endl;
+
+#ifdef PROFILE
+    crimson::ProfileCombiner<std::chrono::nanoseconds> art_combiner;
+    crimson::ProfileCombiner<std::chrono::nanoseconds> rct_combiner;
+    for (uint i = 0; i < sim->get_server_count(); ++i) {
+      const auto& q = sim->get_server(i).get_priority_queue();
+      const auto& art = q.add_request_timer;
+      art_combiner.combine(art);
+      const auto& rct = q.request_complete_timer;
+      rct_combiner.combine(rct);
+    }
+    out << "Server add_request_timer: count:" << art_combiner.get_count() <<
+      ", mean:" << art_combiner.get_mean() <<
+      ", std_dev:" << art_combiner.get_std_dev() <<
+      ", low:" << art_combiner.get_low() <<
+      ", high:" << art_combiner.get_high() << std::endl;
+    out << "Server request_complete_timer: count:" << rct_combiner.get_count() <<
+      ", mean:" << rct_combiner.get_mean() <<
+      ", std_dev:" << rct_combiner.get_std_dev() <<
+      ", low:" << rct_combiner.get_low() <<
+      ", high:" << rct_combiner.get_high() << std::endl;
+    out << "Server combined mean: " <<
+      (art_combiner.get_mean() + rct_combiner.get_mean()) <<
+      std::endl;
+#endif
+}
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
new file mode 100644
index 00000000000..691e64cce43
--- /dev/null
+++ b/src/CMakeLists.txt
@@ -0,0 +1,19 @@
+include_directories(../support/src)
+include_directories(${BOOST_INCLUDE_DIR})
+
+set(local_flags "-Wall -pthread")
+
+set(dmc_srcs dmclock_util.cc ../support/src/run_every.cc)
+
+set_source_files_properties(${dmc_srcs}
+  PROPERTIES
+  COMPILE_FLAGS "${local_flags}"
+  )
+
+if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
+  set(warnings_off " -Wno-unused-variable -Wno-unused-function")
+elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
+  set(warnings_off " -Wno-unused-but-set-variable -Wno-unused-function")
+endif()
+
+add_library(dmclock STATIC ${dmc_srcs})
diff --git a/src/dmclock_client.h b/src/dmclock_client.h
new file mode 100644
index 00000000000..b44e1211b53
--- /dev/null
+++ b/src/dmclock_client.h
@@ -0,0 +1,194 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2017 Red Hat Inc.
+ */
+
+
+#pragma once
+
+#include <map>
+#include <deque>
+#include <chrono>
+#include <thread>
+#include <mutex>
+#include <condition_variable>
+
+#include "run_every.h"
+#include "dmclock_util.h"
+#include "dmclock_recs.h"
+
+#include "gtest/gtest_prod.h"
+
+
+namespace crimson {
+  namespace dmclock {
+    struct ServerInfo {
+      Counter   delta_prev_req;
+      Counter   rho_prev_req;
+      uint32_t  my_delta;
+      uint32_t  my_rho;
+
+      ServerInfo(Counter _delta_prev_req,
+		 Counter _rho_prev_req) :
+	delta_prev_req(_delta_prev_req),
+	rho_prev_req(_rho_prev_req),
+	my_delta(0),
+	my_rho(0)
+      {
+	// empty
+      }
+
+      inline void req_update(Counter delta, Counter rho) {
+	delta_prev_req = delta;
+	rho_prev_req = rho;
+	my_delta = 0;
+	my_rho = 0;
+      }
+
+      inline void resp_update(PhaseType phase) {
+	++my_delta;
+	if (phase == PhaseType::reservation) ++my_rho;
+      }
+    };
+
+
+    // S is server identifier type
+    template<typename S>
+    class ServiceTracker {
+      FRIEND_TEST(dmclock_client, server_erase);
+
+      using TimePoint = decltype(std::chrono::steady_clock::now());
+      using Duration = std::chrono::milliseconds;
+      using MarkPoint = std::pair<TimePoint,Counter>;
+
+      Counter                 delta_counter; // # reqs completed
+      Counter                 rho_counter;   // # reqs completed via reservation
+      std::map<S,ServerInfo>  server_map;
+      mutable std::mutex      data_mtx;      // protects Counters and map
+
+      using DataGuard = std::lock_guard<decltype(data_mtx)>;
+
+      // clean config
+
+      std::deque<MarkPoint>     clean_mark_points;
+      Duration                  clean_age;     // age at which ServerInfo cleaned
+
+      // NB: All threads declared at end, so they're destructed firs!
+
+      std::unique_ptr<RunEvery> cleaning_job;
+
+
+    public:
+
+      // we have to start the counters at 1, as 0 is used in the
+      // cleaning process
+      template<typename Rep, typename Per>
+      ServiceTracker(std::chrono::duration<Rep,Per> _clean_every,
+		     std::chrono::duration<Rep,Per> _clean_age) :
+	delta_counter(1),
+	rho_counter(1),
+	clean_age(std::chrono::duration_cast<Duration>(_clean_age))
+      {
+	cleaning_job =
+	  std::unique_ptr<RunEvery>(
+	    new RunEvery(_clean_every,
+			 std::bind(&ServiceTracker::do_clean, this)));
+      }
+
+
+      // the reason we're overloading the constructor rather than
+      // using default values for the arguments is so that callers
+      // have to either use all defaults or specify all timings; with
+      // default arguments they could specify some without others
+      ServiceTracker() :
+	ServiceTracker(std::chrono::minutes(5), std::chrono::minutes(10))
+      {
+	// empty
+      }
+
+
+      /*
+       * Incorporates the RespParams received into the various counter.
+       */
+      void track_resp(const S& server_id, const PhaseType& phase) {
+	DataGuard g(data_mtx);
+
+	auto it = server_map.find(server_id);
+	if (server_map.end() == it) {
+	  // this code can only run if a request did not precede the
+	  // response or if the record was cleaned up b/w when
+	  // the request was made and now
+	  ServerInfo si(delta_counter, rho_counter);
+	  si.resp_update(phase);
+	  server_map.emplace(server_id, si);
+	} else {
+	  it->second.resp_update(phase);
+	}
+
+	++delta_counter;
+	if (PhaseType::reservation == phase) {
+	  ++rho_counter;
+	}
+      }
+
+
+      /*
+       * Returns the ReqParams for the given server.
+       */
+      ReqParams get_req_params(const S& server) {
+	DataGuard g(data_mtx);
+	auto it = server_map.find(server);
+	if (server_map.end() == it) {
+	  server_map.emplace(server, ServerInfo(delta_counter, rho_counter));
+	  return ReqParams(1, 1);
+	} else {
+	  Counter delta =
+	    1 + delta_counter - it->second.delta_prev_req - it->second.my_delta;
+	  Counter rho =
+	    1 + rho_counter - it->second.rho_prev_req - it->second.my_rho;
+
+	  it->second.req_update(delta_counter, rho_counter);
+
+	  return ReqParams(uint32_t(delta), uint32_t(rho));
+	}
+      }
+
+    private:
+
+      /*
+       * This is being called regularly by RunEvery. Every time it's
+       * called it notes the time and delta counter (mark point) in a
+       * deque. It also looks at the deque to find the most recent
+       * mark point that is older than clean_age. It then walks the
+       * map and delete all server entries that were last used before
+       * that mark point.
+       */
+      void do_clean() {
+	TimePoint now = std::chrono::steady_clock::now();
+	DataGuard g(data_mtx);
+	clean_mark_points.emplace_back(MarkPoint(now, delta_counter));
+
+	Counter earliest = 0;
+	auto point = clean_mark_points.front();
+	while (point.first <= now - clean_age) {
+	  earliest = point.second;
+	  clean_mark_points.pop_front();
+	  point = clean_mark_points.front();
+	}
+
+	if (earliest > 0) {
+	  for (auto i = server_map.begin();
+	       i != server_map.end();
+	       /* empty */) {
+	    auto i2 = i++;
+	    if (i2->second.delta_prev_req <= earliest) {
+	      server_map.erase(i2);
+	    }
+	  }
+	}
+      } // do_clean
+    }; // class ServiceTracker
+  }
+}
diff --git a/src/dmclock_recs.h b/src/dmclock_recs.h
new file mode 100644
index 00000000000..f7a5aaadb10
--- /dev/null
+++ b/src/dmclock_recs.h
@@ -0,0 +1,61 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2017 Red Hat Inc.
+ */
+
+
+#pragma once
+
+
+#include <ostream>
+#include <assert.h>
+
+
+namespace crimson {
+  namespace dmclock {
+    using Counter = uint64_t;
+
+    enum class PhaseType { reservation, priority };
+
+    inline std::ostream& operator<<(std::ostream& out, const PhaseType& phase) {
+      out << (PhaseType::reservation == phase ? "reservation" : "priority");
+      return out;
+    }
+
+    struct ReqParams {
+      // count of all replies since last request; MUSTN'T BE 0
+      uint32_t delta;
+
+      // count of reservation replies since last request; MUSTN'T BE 0
+      uint32_t rho;
+
+      ReqParams(uint32_t _delta, uint32_t _rho) :
+	delta(_delta),
+	rho(_rho)
+      {
+	assert(0 != delta && 0 != rho && rho <= delta);
+      }
+
+      ReqParams() :
+	ReqParams(1, 1)
+      {
+	// empty
+      }
+
+      ReqParams(const ReqParams& other) :
+	delta(other.delta),
+	rho(other.rho)
+      {
+	// empty
+      }
+
+      friend std::ostream& operator<<(std::ostream& out, const ReqParams& rp) {
+	out << "ReqParams{ delta:" << rp.delta <<
+	  ", rho:" << rp.rho << " }";
+	return out;
+      }
+    }; // class ReqParams
+  }
+}
diff --git a/src/dmclock_server.h b/src/dmclock_server.h
new file mode 100644
index 00000000000..65013063fa7
--- /dev/null
+++ b/src/dmclock_server.h
@@ -0,0 +1,1588 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2017 Red Hat Inc.
+ */
+
+
+#pragma once
+
+/* COMPILATION OPTIONS
+ *
+ * By default we include an optimization over the originally published
+ * dmclock algorithm using not the values of rho and delta that were
+ * sent in with a request but instead the most recent rho and delta
+ * values from the requests's client. To restore the algorithm's
+ * original behavior, define DO_NOT_DELAY_TAG_CALC (i.e., compiler
+ * argument -DDO_NOT_DELAY_TAG_CALC).
+ *
+ * The prop_heap does not seem to be necessary. The only thing it
+ * would help with is quickly finding the mininum proportion/prioity
+ * when an idle client became active. To have the code maintain the
+ * proportional heap, define USE_PROP_HEAP (i.e., compiler argument
+ * -DUSE_PROP_HEAP).
+ */
+
+#include <assert.h>
+
+#include <cmath>
+#include <memory>
+#include <map>
+#include <deque>
+#include <queue>
+#include <atomic>
+#include <mutex>
+#include <condition_variable>
+#include <thread>
+#include <iostream>
+#include <sstream>
+#include <limits>
+
+#include <boost/variant.hpp>
+
+#include "indirect_intrusive_heap.h"
+#include "run_every.h"
+#include "dmclock_util.h"
+#include "dmclock_recs.h"
+
+#ifdef PROFILE
+#include "profile.h"
+#endif
+
+#include "gtest/gtest_prod.h"
+
+
+namespace crimson {
+
+  namespace dmclock {
+
+    namespace c = crimson;
+
+    constexpr double max_tag = std::numeric_limits<double>::is_iec559 ?
+      std::numeric_limits<double>::infinity() :
+      std::numeric_limits<double>::max();
+    constexpr double min_tag = std::numeric_limits<double>::is_iec559 ?
+      -std::numeric_limits<double>::infinity() :
+      std::numeric_limits<double>::lowest();
+    constexpr uint tag_modulo = 1000000;
+
+    struct ClientInfo {
+      const double reservation;  // minimum
+      const double weight;       // proportional
+      const double limit;        // maximum
+
+      // multiplicative inverses of above, which we use in calculations
+      // and don't want to recalculate repeatedly
+      const double reservation_inv;
+      const double weight_inv;
+      const double limit_inv;
+
+      // order parameters -- min, "normal", max
+      ClientInfo(double _reservation, double _weight, double _limit) :
+	reservation(_reservation),
+	weight(_weight),
+	limit(_limit),
+	reservation_inv(0.0 == reservation ? 0.0 : 1.0 / reservation),
+	weight_inv(     0.0 == weight      ? 0.0 : 1.0 / weight),
+	limit_inv(      0.0 == limit       ? 0.0 : 1.0 / limit)
+      {
+	// empty
+      }
+
+
+      friend std::ostream& operator<<(std::ostream& out,
+				      const ClientInfo& client) {
+	out <<
+	  "{ ClientInfo:: r:" << client.reservation <<
+	  " w:" << std::fixed << client.weight <<
+	  " l:" << std::fixed << client.limit <<
+	  " 1/r:" << std::fixed << client.reservation_inv <<
+	  " 1/w:" << std::fixed << client.weight_inv <<
+	  " 1/l:" << std::fixed << client.limit_inv <<
+	  " }";
+	return out;
+      }
+    }; // class ClientInfo
+
+
+    struct RequestTag {
+      double reservation;
+      double proportion;
+      double limit;
+      bool   ready; // true when within limit
+#ifndef DO_NOT_DELAY_TAG_CALC
+      Time   arrival;
+#endif
+
+      RequestTag(const RequestTag& prev_tag,
+		 const ClientInfo& client,
+		 const ReqParams& req_params,
+		 const Time& time,
+		 const double cost = 0.0) :
+	reservation(cost + tag_calc(time,
+				    prev_tag.reservation,
+				    client.reservation_inv,
+				    req_params.rho,
+				    true)),
+	proportion(tag_calc(time,
+			    prev_tag.proportion,
+			    client.weight_inv,
+			    req_params.delta,
+			    true)),
+	limit(tag_calc(time,
+		       prev_tag.limit,
+		       client.limit_inv,
+		       req_params.delta,
+		       false)),
+	ready(false)
+#ifndef DO_NOT_DELAY_TAG_CALC
+	, arrival(time)
+#endif
+      {
+	assert(reservation < max_tag || proportion < max_tag);
+      }
+
+      RequestTag(double _res, double _prop, double _lim, const Time& _arrival) :
+	reservation(_res),
+	proportion(_prop),
+	limit(_lim),
+	ready(false)
+#ifndef DO_NOT_DELAY_TAG_CALC
+	, arrival(_arrival)
+#endif
+      {
+	assert(reservation < max_tag || proportion < max_tag);
+      }
+
+      RequestTag(const RequestTag& other) :
+	reservation(other.reservation),
+	proportion(other.proportion),
+	limit(other.limit),
+	ready(other.ready)
+#ifndef DO_NOT_DELAY_TAG_CALC
+	, arrival(other.arrival)
+#endif
+      {
+	// empty
+      }
+
+      static std::string format_tag_change(double before, double after) {
+	if (before == after) {
+	  return std::string("same");
+	} else {
+	  std::stringstream ss;
+	  ss << format_tag(before) << "=>" << format_tag(after);
+	  return ss.str();
+	}
+      }
+
+      static std::string format_tag(double value) {
+	if (max_tag == value) {
+	  return std::string("max");
+	} else if (min_tag == value) {
+	  return std::string("min");
+	} else {
+	  return format_time(value, tag_modulo);
+	}
+      }
+
+    private:
+
+      static double tag_calc(const Time& time,
+			     double prev,
+			     double increment,
+			     uint32_t dist_req_val,
+			     bool extreme_is_high) {
+	if (0.0 == increment) {
+	  return extreme_is_high ? max_tag : min_tag;
+	} else {
+	  if (0 != dist_req_val) {
+	    increment *= dist_req_val;
+	  }
+	  return std::max(time, prev + increment);
+	}
+      }
+
+      friend std::ostream& operator<<(std::ostream& out,
+				      const RequestTag& tag) {
+	out <<
+	  "{ RequestTag:: ready:" << (tag.ready ? "true" : "false") <<
+	  " r:" << format_tag(tag.reservation) <<
+	  " p:" << format_tag(tag.proportion) <<
+	  " l:" << format_tag(tag.limit) <<
+#if 0 // try to resolve this to make sure Time is operator<<'able.
+#ifndef DO_NOT_DELAY_TAG_CALC
+	  " arrival:" << tag.arrival <<
+#endif
+#endif
+	  " }";
+	return out;
+      }
+    }; // class RequestTag
+
+
+    // C is client identifier type, R is request type, B is heap
+    // branching factor
+    template<typename C, typename R, uint B>
+    class PriorityQueueBase {
+      FRIEND_TEST(dmclock_server, client_idle_erase);
+
+    public:
+
+      using RequestRef = std::unique_ptr<R>;
+
+    protected:
+
+      using TimePoint = decltype(std::chrono::steady_clock::now());
+      using Duration = std::chrono::milliseconds;
+      using MarkPoint = std::pair<TimePoint,Counter>;
+
+      enum class ReadyOption {ignore, lowers, raises};
+
+      // forward decl for friend decls
+      template<double RequestTag::*, ReadyOption, bool>
+      struct ClientCompare;
+
+      class ClientReq {
+	friend PriorityQueueBase;
+
+	RequestTag tag;
+	C          client_id;
+	RequestRef request;
+
+      public:
+
+	ClientReq(const RequestTag& _tag,
+		  const C&          _client_id,
+		  RequestRef&&      _request) :
+	  tag(_tag),
+	  client_id(_client_id),
+	  request(std::move(_request))
+	{
+	  // empty
+	}
+
+	friend std::ostream& operator<<(std::ostream& out, const ClientReq& c) {
+	  out << "{ ClientReq:: tag:" << c.tag << " client:" <<
+	    c.client_id << " }";
+	  return out;
+	}
+      }; // class ClientReq
+
+    public:
+
+      // NOTE: ClientRec is in the "public" section for compatibility
+      // with g++ 4.8.4, which complains if it's not. By g++ 6.3.1
+      // ClientRec could be "protected" with no issue. [See comments
+      // associated with function submit_top_request.]
+      class ClientRec {
+	friend PriorityQueueBase<C,R,B>;
+
+	C                     client;
+	RequestTag            prev_tag;
+	std::deque<ClientReq> requests;
+
+	// amount added from the proportion tag as a result of
+	// an idle client becoming unidle
+	double                prop_delta = 0.0;
+
+	c::IndIntruHeapData   reserv_heap_data;
+	c::IndIntruHeapData   lim_heap_data;
+	c::IndIntruHeapData   ready_heap_data;
+#if USE_PROP_HEAP
+	c::IndIntruHeapData   prop_heap_data;
+#endif
+
+      public:
+
+	ClientInfo            info;
+	bool                  idle;
+	Counter               last_tick;
+	uint32_t              cur_rho;
+	uint32_t              cur_delta;
+
+	ClientRec(C _client,
+		  const ClientInfo& _info,
+		  Counter current_tick) :
+	  client(_client),
+	  prev_tag(0.0, 0.0, 0.0, TimeZero),
+	  info(_info),
+	  idle(true),
+	  last_tick(current_tick),
+	  cur_rho(1),
+	  cur_delta(1)
+	{
+	  // empty
+	}
+
+	inline const RequestTag& get_req_tag() const {
+	  return prev_tag;
+	}
+
+	static inline void assign_unpinned_tag(double& lhs, const double rhs) {
+	  if (rhs != max_tag && rhs != min_tag) {
+	    lhs = rhs;
+	  }
+	}
+
+	inline void update_req_tag(const RequestTag& _prev,
+				   const Counter& _tick) {
+	  assign_unpinned_tag(prev_tag.reservation, _prev.reservation);
+	  assign_unpinned_tag(prev_tag.limit, _prev.limit);
+	  assign_unpinned_tag(prev_tag.proportion, _prev.proportion);
+	  last_tick = _tick;
+	}
+
+	inline void add_request(const RequestTag& tag,
+				const C&          client_id,
+				RequestRef&&      request) {
+	  requests.emplace_back(ClientReq(tag, client_id, std::move(request)));
+	}
+
+	inline const ClientReq& next_request() const {
+	  return requests.front();
+	}
+
+	inline ClientReq& next_request() {
+	  return requests.front();
+	}
+
+	inline void pop_request() {
+	  requests.pop_front();
+	}
+
+	inline bool has_request() const {
+	  return !requests.empty();
+	}
+
+	inline size_t request_count() const {
+	  return requests.size();
+	}
+
+	// NB: because a deque is the underlying structure, this
+	// operation might be expensive
+	bool remove_by_req_filter_fw(std::function<bool(const R&)> filter_accum) {
+	  bool any_removed = false;
+	  for (auto i = requests.begin();
+	       i != requests.end();
+	       /* no inc */) {
+	    if (filter_accum(*i->request)) {
+	      any_removed = true;
+	      i = requests.erase(i);
+	    } else {
+	      ++i;
+	    }
+	  }
+	  return any_removed;
+	}
+
+	// NB: because a deque is the underlying structure, this
+	// operation might be expensive
+	bool remove_by_req_filter_bw(std::function<bool(const R&)> filter_accum) {
+	  bool any_removed = false;
+	  for (auto i = requests.rbegin();
+	       i != requests.rend();
+	       /* no inc */) {
+	    if (filter_accum(*i->request)) {
+	      any_removed = true;
+	      i = decltype(i){ requests.erase(std::next(i).base()) };
+	    } else {
+	      ++i;
+	    }
+	  }
+	  return any_removed;
+	}
+
+	inline bool
+	remove_by_req_filter(std::function<bool(const R&)> filter_accum,
+			     bool visit_backwards) {
+	  if (visit_backwards) {
+	    return remove_by_req_filter_bw(filter_accum);
+	  } else {
+	    return remove_by_req_filter_fw(filter_accum);
+	  }
+	}
+
+	friend std::ostream&
+	operator<<(std::ostream& out,
+		   const typename PriorityQueueBase<C,R,B>::ClientRec& e) {
+	  out << "{ ClientRec::" <<
+	    " client:" << e.client <<
+	    " prev_tag:" << e.prev_tag <<
+	    " req_count:" << e.requests.size() <<
+	    " top_req:";
+	  if (e.has_request()) {
+	    out << e.next_request();
+	  } else {
+	    out << "none";
+	  }
+	  out << " }";
+
+	  return out;
+	}
+      }; // class ClientRec
+
+      using ClientRecRef = std::shared_ptr<ClientRec>;
+
+      // when we try to get the next request, we'll be in one of three
+      // situations -- we'll have one to return, have one that can
+      // fire in the future, or not have any
+      enum class NextReqType { returning, future, none };
+
+      // specifies which queue next request will get popped from
+      enum class HeapId { reservation, ready };
+
+      // this is returned from next_req to tell the caller the situation
+      struct NextReq {
+	NextReqType type;
+	union {
+	  HeapId    heap_id;
+	  Time      when_ready;
+	};
+      };
+
+
+      // a function that can be called to look up client information
+      using ClientInfoFunc = std::function<ClientInfo(const C&)>;
+
+
+      bool empty() const {
+	DataGuard g(data_mtx);
+	return (resv_heap.empty() || ! resv_heap.top().has_request());
+      }
+
+
+      size_t client_count() const {
+	DataGuard g(data_mtx);
+	return resv_heap.size();
+      }
+
+
+      size_t request_count() const {
+	DataGuard g(data_mtx);
+	size_t total = 0;
+	for (auto i = resv_heap.cbegin(); i != resv_heap.cend(); ++i) {
+	  total += i->request_count();
+	}
+	return total;
+      }
+
+
+      bool remove_by_req_filter(std::function<bool(const R&)> filter_accum,
+				bool visit_backwards = false) {
+	bool any_removed = false;
+	DataGuard g(data_mtx);
+	for (auto i : client_map) {
+	  bool modified =
+	    i.second->remove_by_req_filter(filter_accum, visit_backwards);
+	  if (modified) {
+	    resv_heap.adjust(*i.second);
+	    limit_heap.adjust(*i.second);
+	    ready_heap.adjust(*i.second);
+#if USE_PROP_HEAP
+	    prop_heap.adjust(*i.second);
+#endif
+	    any_removed = true;
+	  }
+	}
+	return any_removed;
+      }
+
+
+      // use as a default value when no accumulator is provide
+      static void request_sink(const R& req) {
+	// do nothing
+      }
+
+
+      void remove_by_client(const C& client,
+			    bool reverse = false,
+			    std::function<void (const R&)> accum = request_sink) {
+	DataGuard g(data_mtx);
+
+	auto i = client_map.find(client);
+
+	if (i == client_map.end()) return;
+
+	if (reverse) {
+	  for (auto j = i->second->requests.rbegin();
+	       j != i->second->requests.rend();
+	       ++j) {
+	    accum(*j->request);
+	  }
+	} else {
+	  for (auto j = i->second->requests.begin();
+	       j != i->second->requests.end();
+	       ++j) {
+	    accum(*j->request);
+	  }
+	}
+
+	i->second->requests.clear();
+
+	resv_heap.adjust(*i->second);
+	limit_heap.adjust(*i->second);
+	ready_heap.adjust(*i->second);
+#if USE_PROP_HEAP
+	prop_heap.adjust(*i->second);
+#endif
+      }
+
+
+      uint get_heap_branching_factor() const {
+	return B;
+      }
+
+
+      friend std::ostream& operator<<(std::ostream& out,
+				      const PriorityQueueBase& q) {
+	std::lock_guard<decltype(q.data_mtx)> guard(q.data_mtx);
+
+	out << "{ PriorityQueue::";
+	for (const auto& c : q.client_map) {
+	  out << "  { client:" << c.first << ", record:" << *c.second <<
+	    " }";
+	}
+	if (!q.resv_heap.empty()) {
+	  const auto& resv = q.resv_heap.top();
+	  out << " { reservation_top:" << resv << " }";
+	  const auto& ready = q.ready_heap.top();
+	  out << " { ready_top:" << ready << " }";
+	  const auto& limit = q.limit_heap.top();
+	  out << " { limit_top:" << limit << " }";
+	} else {
+	  out << " HEAPS-EMPTY";
+	}
+	out << " }";
+
+	return out;
+      }
+
+      // for debugging
+      void display_queues(std::ostream& out,
+			  bool show_res = true,
+			  bool show_lim = true,
+			  bool show_ready = true,
+			  bool show_prop = true) const {
+	auto filter = [](const ClientRec& e)->bool { return true; };
+	DataGuard g(data_mtx);
+	if (show_res) {
+	  resv_heap.display_sorted(out << "RESER:", filter);
+	}
+	if (show_lim) {
+	  limit_heap.display_sorted(out << "LIMIT:", filter);
+	}
+	if (show_ready) {
+	  ready_heap.display_sorted(out << "READY:", filter);
+	}
+#if USE_PROP_HEAP
+	if (show_prop) {
+	  prop_heap.display_sorted(out << "PROPO:", filter);
+	}
+#endif
+      } // display_queues
+
+
+    protected:
+
+      // The ClientCompare functor is essentially doing a precedes?
+      // operator, returning true if and only if the first parameter
+      // must precede the second parameter. If the second must precede
+      // the first, or if they are equivalent, false should be
+      // returned. The reason for this behavior is that it will be
+      // called to test if two items are out of order and if true is
+      // returned it will reverse the items. Therefore false is the
+      // default return when it doesn't matter to prevent unnecessary
+      // re-ordering.
+      //
+      // The template is supporting variations in sorting based on the
+      // heap in question and allowing these variations to be handled
+      // at compile-time.
+      //
+      // tag_field determines which tag is being used for comparison
+      //
+      // ready_opt determines how the ready flag influences the sort
+      //
+      // use_prop_delta determines whether the proportional delta is
+      // added in for comparison
+      template<double RequestTag::*tag_field,
+	       ReadyOption ready_opt,
+	       bool use_prop_delta>
+      struct ClientCompare {
+	bool operator()(const ClientRec& n1, const ClientRec& n2) const {
+	  if (n1.has_request()) {
+	    if (n2.has_request()) {
+	      const auto& t1 = n1.next_request().tag;
+	      const auto& t2 = n2.next_request().tag;
+	      if (ReadyOption::ignore == ready_opt || t1.ready == t2.ready) {
+		// if we don't care about ready or the ready values are the same
+		if (use_prop_delta) {
+		  return (t1.*tag_field + n1.prop_delta) <
+		    (t2.*tag_field + n2.prop_delta);
+		} else {
+		  return t1.*tag_field < t2.*tag_field;
+		}
+	      } else if (ReadyOption::raises == ready_opt) {
+		// use_ready == true && the ready fields are different
+		return t1.ready;
+	      } else {
+		return t2.ready;
+	      }
+	    } else {
+	      // n1 has request but n2 does not
+	      return true;
+	    }
+	  } else if (n2.has_request()) {
+	    // n2 has request but n1 does not
+	    return false;
+	  } else {
+	    // both have none; keep stable w false
+	    return false;
+	  }
+	}
+      };
+
+      ClientInfoFunc       client_info_f;
+
+      mutable std::mutex data_mtx;
+      using DataGuard = std::lock_guard<decltype(data_mtx)>;
+
+      // stable mapping between client ids and client queues
+      std::map<C,ClientRecRef> client_map;
+
+      c::IndIntruHeap<ClientRecRef,
+		      ClientRec,
+		      &ClientRec::reserv_heap_data,
+		      ClientCompare<&RequestTag::reservation,
+				    ReadyOption::ignore,
+				    false>,
+		      B> resv_heap;
+#if USE_PROP_HEAP
+      c::IndIntruHeap<ClientRecRef,
+		      ClientRec,
+		      &ClientRec::prop_heap_data,
+		      ClientCompare<&RequestTag::proportion,
+				    ReadyOption::ignore,
+				    true>,
+		      B> prop_heap;
+#endif
+      c::IndIntruHeap<ClientRecRef,
+		      ClientRec,
+		      &ClientRec::lim_heap_data,
+		      ClientCompare<&RequestTag::limit,
+				    ReadyOption::lowers,
+				    false>,
+		      B> limit_heap;
+      c::IndIntruHeap<ClientRecRef,
+		      ClientRec,
+		      &ClientRec::ready_heap_data,
+		      ClientCompare<&RequestTag::proportion,
+				    ReadyOption::raises,
+				    true>,
+		      B> ready_heap;
+
+      // if all reservations are met and all other requestes are under
+      // limit, this will allow the request next in terms of
+      // proportion to still get issued
+      bool             allow_limit_break;
+
+      std::atomic_bool finishing;
+
+      // every request creates a tick
+      Counter tick = 0;
+
+      // performance data collection
+      size_t reserv_sched_count = 0;
+      size_t prop_sched_count = 0;
+      size_t limit_break_sched_count = 0;
+
+      Duration                  idle_age;
+      Duration                  erase_age;
+      Duration                  check_time;
+      std::deque<MarkPoint>     clean_mark_points;
+
+      // NB: All threads declared at end, so they're destructed first!
+
+      std::unique_ptr<RunEvery> cleaning_job;
+
+
+      // COMMON constructor that others feed into; we can accept three
+      // different variations of durations
+      template<typename Rep, typename Per>
+      PriorityQueueBase(ClientInfoFunc _client_info_f,
+			std::chrono::duration<Rep,Per> _idle_age,
+			std::chrono::duration<Rep,Per> _erase_age,
+			std::chrono::duration<Rep,Per> _check_time,
+			bool _allow_limit_break) :
+	client_info_f(_client_info_f),
+	allow_limit_break(_allow_limit_break),
+	finishing(false),
+	idle_age(std::chrono::duration_cast<Duration>(_idle_age)),
+	erase_age(std::chrono::duration_cast<Duration>(_erase_age)),
+	check_time(std::chrono::duration_cast<Duration>(_check_time))
+      {
+	assert(_erase_age >= _idle_age);
+	assert(_check_time < _idle_age);
+	cleaning_job =
+	  std::unique_ptr<RunEvery>(
+	    new RunEvery(check_time,
+			 std::bind(&PriorityQueueBase::do_clean, this)));
+      }
+
+
+      ~PriorityQueueBase() {
+	finishing = true;
+      }
+
+
+      // data_mtx must be held by caller
+      void do_add_request(RequestRef&&     request,
+			  const C&         client_id,
+			  const ReqParams& req_params,
+			  const Time       time,
+			  const double     cost = 0.0) {
+	++tick;
+
+	// this pointer will help us create a reference to a shared
+	// pointer, no matter which of two codepaths we take
+	ClientRec* temp_client;
+
+	auto client_it = client_map.find(client_id);
+	if (client_map.end() != client_it) {
+	  temp_client = &(*client_it->second); // address of obj of shared_ptr
+	} else {
+	  ClientInfo info = client_info_f(client_id);
+	  ClientRecRef client_rec =
+	    std::make_shared<ClientRec>(client_id, info, tick);
+	  resv_heap.push(client_rec);
+#if USE_PROP_HEAP
+	  prop_heap.push(client_rec);
+#endif
+	  limit_heap.push(client_rec);
+	  ready_heap.push(client_rec);
+	  client_map[client_id] = client_rec;
+	  temp_client = &(*client_rec); // address of obj of shared_ptr
+	}
+
+	// for convenience, we'll create a reference to the shared pointer
+	ClientRec& client = *temp_client;
+
+	if (client.idle) {
+	  // We need to do an adjustment so that idle clients compete
+	  // fairly on proportional tags since those tags may have
+	  // drifted from real-time. Either use the lowest existing
+	  // proportion tag -- O(1) -- or the client with the lowest
+	  // previous proportion tag -- O(n) where n = # clients.
+	  //
+	  // So we don't have to maintain a propotional queue that
+	  // keeps the minimum on proportional tag alone (we're
+	  // instead using a ready queue), we'll have to check each
+	  // client.
+	  //
+	  // The alternative would be to maintain a proportional queue
+	  // (define USE_PROP_TAG) and do an O(1) operation here.
+
+	  // Was unable to confirm whether equality testing on
+	  // std::numeric_limits<double>::max() is guaranteed, so
+	  // we'll use a compile-time calculated trigger that is one
+	  // third the max, which should be much larger than any
+	  // expected organic value.
+	  constexpr double lowest_prop_tag_trigger =
+	    std::numeric_limits<double>::max() / 3.0;
+
+	  double lowest_prop_tag = std::numeric_limits<double>::max();
+	  for (auto const &c : client_map) {
+	    // don't use ourselves (or anything else that might be
+	    // listed as idle) since we're now in the map
+	    if (!c.second->idle) {
+	      double p;
+	      // use either lowest proportion tag or previous proportion tag
+	      if (c.second->has_request()) {
+		p = c.second->next_request().tag.proportion +
+		  c.second->prop_delta;
+	      } else {
+	        p = c.second->get_req_tag().proportion + c.second->prop_delta;
+	      }
+
+	      if (p < lowest_prop_tag) {
+		lowest_prop_tag = p;
+	      }
+	    }
+	  }
+
+	  // if this conditional does not fire, it
+	  if (lowest_prop_tag < lowest_prop_tag_trigger) {
+	    client.prop_delta = lowest_prop_tag - time;
+	  }
+	  client.idle = false;
+	} // if this client was idle
+
+#ifndef DO_NOT_DELAY_TAG_CALC
+	RequestTag tag(0, 0, 0, time);
+
+	if (!client.has_request()) {
+	  tag = RequestTag(client.get_req_tag(), client.info,
+			   req_params, time, cost);
+
+	  // copy tag to previous tag for client
+	  client.update_req_tag(tag, tick);
+	}
+#else
+	RequestTag tag(client.get_req_tag(), client.info, req_params, time, cost);
+	// copy tag to previous tag for client
+	client.update_req_tag(tag, tick);
+#endif
+
+	client.add_request(tag, client.client, std::move(request));
+	if (1 == client.requests.size()) {
+	  // NB: can the following 4 calls to adjust be changed
+	  // promote? Can adding a request ever demote a client in the
+	  // heaps?
+	  resv_heap.adjust(client);
+	  limit_heap.adjust(client);
+	  ready_heap.adjust(client);
+#if USE_PROP_HEAP
+	  prop_heap.adjust(client);
+#endif
+	}
+
+	client.cur_rho = req_params.rho;
+	client.cur_delta = req_params.delta;
+
+	resv_heap.adjust(client);
+	limit_heap.adjust(client);
+	ready_heap.adjust(client);
+#if USE_PROP_HEAP
+	prop_heap.adjust(client);
+#endif
+      } // add_request
+
+
+      // data_mtx should be held when called; top of heap should have
+      // a ready request
+      template<typename C1, IndIntruHeapData ClientRec::*C2, typename C3>
+      void pop_process_request(IndIntruHeap<C1, ClientRec, C2, C3, B>& heap,
+			       std::function<void(const C& client,
+						  RequestRef& request)> process) {
+	// gain access to data
+	ClientRec& top = heap.top();
+	ClientReq& first = top.next_request();
+	RequestRef request = std::move(first.request);
+
+	// pop request and adjust heaps
+	top.pop_request();
+
+#ifndef DO_NOT_DELAY_TAG_CALC
+	if (top.has_request()) {
+	  ClientReq& next_first = top.next_request();
+	  next_first.tag = RequestTag(first.tag, top.info,
+	                              ReqParams(top.cur_delta, top.cur_rho),
+				      next_first.tag.arrival);
+
+  	  // copy tag to previous tag for client
+	  top.update_req_tag(next_first.tag, tick);
+	}
+#endif
+
+	resv_heap.demote(top);
+	limit_heap.adjust(top);
+#if USE_PROP_HEAP
+	prop_heap.demote(top);
+#endif
+	ready_heap.demote(top);
+
+	// process
+	process(top.client, request);
+      } // pop_process_request
+
+
+      // data_mtx should be held when called
+      void reduce_reservation_tags(ClientRec& client) {
+	for (auto& r : client.requests) {
+	  r.tag.reservation -= client.info.reservation_inv;
+
+#ifndef DO_NOT_DELAY_TAG_CALC
+	  // reduce only for front tag. because next tags' value are invalid
+	  break;
+#endif
+	}
+	// don't forget to update previous tag
+	client.prev_tag.reservation -= client.info.reservation_inv;
+	resv_heap.promote(client);
+      }
+
+
+      // data_mtx should be held when called
+      void reduce_reservation_tags(const C& client_id) {
+	auto client_it = client_map.find(client_id);
+
+	// means the client was cleaned from map; should never happen
+	// as long as cleaning times are long enough
+	assert(client_map.end() != client_it);
+	reduce_reservation_tags(*client_it->second);
+      }
+
+
+      // data_mtx should be held when called
+      NextReq do_next_request(Time now) {
+	NextReq result;
+
+	// if reservation queue is empty, all are empty (i.e., no active clients)
+	if(resv_heap.empty()) {
+	  result.type = NextReqType::none;
+	  return result;
+	}
+
+	// try constraint (reservation) based scheduling
+
+	auto& reserv = resv_heap.top();
+	if (reserv.has_request() &&
+	    reserv.next_request().tag.reservation <= now) {
+	  result.type = NextReqType::returning;
+	  result.heap_id = HeapId::reservation;
+	  return result;
+	}
+
+	// no existing reservations before now, so try weight-based
+	// scheduling
+
+	// all items that are within limit are eligible based on
+	// priority
+	auto limits = &limit_heap.top();
+	while (limits->has_request() &&
+	       !limits->next_request().tag.ready &&
+	       limits->next_request().tag.limit <= now) {
+	  limits->next_request().tag.ready = true;
+	  ready_heap.promote(*limits);
+	  limit_heap.demote(*limits);
+
+	  limits = &limit_heap.top();
+	}
+
+	auto& readys = ready_heap.top();
+	if (readys.has_request() &&
+	    readys.next_request().tag.ready &&
+	    readys.next_request().tag.proportion < max_tag) {
+	  result.type = NextReqType::returning;
+	  result.heap_id = HeapId::ready;
+	  return result;
+	}
+
+	// if nothing is schedulable by reservation or
+	// proportion/weight, and if we allow limit break, try to
+	// schedule something with the lowest proportion tag or
+	// alternatively lowest reservation tag.
+	if (allow_limit_break) {
+	  if (readys.has_request() &&
+	      readys.next_request().tag.proportion < max_tag) {
+	    result.type = NextReqType::returning;
+	    result.heap_id = HeapId::ready;
+	    return result;
+	  } else if (reserv.has_request() &&
+		     reserv.next_request().tag.reservation < max_tag) {
+	    result.type = NextReqType::returning;
+	    result.heap_id = HeapId::reservation;
+	    return result;
+	  }
+	}
+
+	// nothing scheduled; make sure we re-run when next
+	// reservation item or next limited item comes up
+
+	Time next_call = TimeMax;
+	if (resv_heap.top().has_request()) {
+	  next_call =
+	    min_not_0_time(next_call,
+			   resv_heap.top().next_request().tag.reservation);
+	}
+	if (limit_heap.top().has_request()) {
+	  const auto& next = limit_heap.top().next_request();
+	  assert(!next.tag.ready || max_tag == next.tag.proportion);
+	  next_call = min_not_0_time(next_call, next.tag.limit);
+	}
+	if (next_call < TimeMax) {
+	  result.type = NextReqType::future;
+	  result.when_ready = next_call;
+	  return result;
+	} else {
+	  result.type = NextReqType::none;
+	  return result;
+	}
+      } // do_next_request
+
+
+      // if possible is not zero and less than current then return it;
+      // otherwise return current; the idea is we're trying to find
+      // the minimal time but ignoring zero
+      static inline const Time& min_not_0_time(const Time& current,
+					       const Time& possible) {
+	return TimeZero == possible ? current : std::min(current, possible);
+      }
+
+
+      /*
+       * This is being called regularly by RunEvery. Every time it's
+       * called it notes the time and delta counter (mark point) in a
+       * deque. It also looks at the deque to find the most recent
+       * mark point that is older than clean_age. It then walks the
+       * map and delete all server entries that were last used before
+       * that mark point.
+       */
+      void do_clean() {
+	TimePoint now = std::chrono::steady_clock::now();
+	DataGuard g(data_mtx);
+	clean_mark_points.emplace_back(MarkPoint(now, tick));
+
+	// first erase the super-old client records
+
+	Counter erase_point = 0;
+	auto point = clean_mark_points.front();
+	while (point.first <= now - erase_age) {
+	  erase_point = point.second;
+	  clean_mark_points.pop_front();
+	  point = clean_mark_points.front();
+	}
+
+	Counter idle_point = 0;
+	for (auto i : clean_mark_points) {
+	  if (i.first <= now - idle_age) {
+	    idle_point = i.second;
+	  } else {
+	    break;
+	  }
+	}
+
+	if (erase_point > 0 || idle_point > 0) {
+	  for (auto i = client_map.begin(); i != client_map.end(); /* empty */) {
+	    auto i2 = i++;
+	    if (erase_point && i2->second->last_tick <= erase_point) {
+	      delete_from_heaps(i2->second);
+	      client_map.erase(i2);
+	    } else if (idle_point && i2->second->last_tick <= idle_point) {
+	      i2->second->idle = true;
+	    }
+	  } // for
+	} // if
+      } // do_clean
+
+
+      // data_mtx must be held by caller
+      template<IndIntruHeapData ClientRec::*C1,typename C2>
+      void delete_from_heap(ClientRecRef& client,
+			    c::IndIntruHeap<ClientRecRef,ClientRec,C1,C2,B>& heap) {
+	auto i = heap.rfind(client);
+	heap.remove(i);
+      }
+
+
+      // data_mtx must be held by caller
+      void delete_from_heaps(ClientRecRef& client) {
+	delete_from_heap(client, resv_heap);
+#if USE_PROP_HEAP
+	delete_from_heap(client, prop_heap);
+#endif
+	delete_from_heap(client, limit_heap);
+	delete_from_heap(client, ready_heap);
+      }
+    }; // class PriorityQueueBase
+
+
+    template<typename C, typename R, uint B=2>
+    class PullPriorityQueue : public PriorityQueueBase<C,R,B> {
+      using super = PriorityQueueBase<C,R,B>;
+
+    public:
+
+      // When a request is pulled, this is the return type.
+      struct PullReq {
+	struct Retn {
+	  C                           client;
+	  typename super::RequestRef  request;
+	  PhaseType                   phase;
+	};
+
+	typename super::NextReqType   type;
+	boost::variant<Retn,Time>     data;
+
+	bool is_none() const { return type == super::NextReqType::none; }
+
+	bool is_retn() const { return type == super::NextReqType::returning; }
+	Retn& get_retn() {
+	  return boost::get<Retn>(data);
+	}
+
+	bool is_future() const { return type == super::NextReqType::future; }
+	Time getTime() const { return boost::get<Time>(data); }
+      };
+
+
+#ifdef PROFILE
+      ProfileTimer<std::chrono::nanoseconds> pull_request_timer;
+      ProfileTimer<std::chrono::nanoseconds> add_request_timer;
+#endif
+
+      template<typename Rep, typename Per>
+      PullPriorityQueue(typename super::ClientInfoFunc _client_info_f,
+			std::chrono::duration<Rep,Per> _idle_age,
+			std::chrono::duration<Rep,Per> _erase_age,
+			std::chrono::duration<Rep,Per> _check_time,
+			bool _allow_limit_break = false) :
+	super(_client_info_f,
+	      _idle_age, _erase_age, _check_time,
+	      _allow_limit_break)
+      {
+	// empty
+      }
+
+
+      // pull convenience constructor
+      PullPriorityQueue(typename super::ClientInfoFunc _client_info_f,
+			bool _allow_limit_break = false) :
+	PullPriorityQueue(_client_info_f,
+			  std::chrono::minutes(10),
+			  std::chrono::minutes(15),
+			  std::chrono::minutes(6),
+			  _allow_limit_break)
+      {
+	// empty
+      }
+
+
+      inline void add_request(const R& request,
+			      const C& client_id,
+			      const ReqParams& req_params,
+			      double addl_cost = 0.0) {
+	add_request(typename super::RequestRef(new R(request)),
+		    client_id,
+		    req_params,
+		    get_time(),
+		    addl_cost);
+      }
+
+
+      inline void add_request(const R& request,
+			      const C& client_id,
+			      double addl_cost = 0.0) {
+	static const ReqParams null_req_params;
+	add_request(typename super::RequestRef(new R(request)),
+		    client_id,
+		    null_req_params,
+		    get_time(),
+		    addl_cost);
+      }
+
+
+
+      inline void add_request_time(const R& request,
+				   const C& client_id,
+				   const ReqParams& req_params,
+				   const Time time,
+				   double addl_cost = 0.0) {
+	add_request(typename super::RequestRef(new R(request)),
+		    client_id,
+		    req_params,
+		    time,
+		    addl_cost);
+      }
+
+
+      inline void add_request(typename super::RequestRef&& request,
+			      const C& client_id,
+			      const ReqParams& req_params,
+			      double addl_cost = 0.0) {
+	add_request(request, req_params, client_id, get_time(), addl_cost);
+      }
+
+
+      inline void add_request(typename super::RequestRef&& request,
+			      const C& client_id,
+			      double addl_cost = 0.0) {
+	static const ReqParams null_req_params;
+	add_request(request, null_req_params, client_id, get_time(), addl_cost);
+      }
+
+
+      // this does the work; the versions above provide alternate interfaces
+      void add_request(typename super::RequestRef&& request,
+		       const C&                     client_id,
+		       const ReqParams&             req_params,
+		       const Time                   time,
+		       double                       addl_cost = 0.0) {
+	typename super::DataGuard g(this->data_mtx);
+#ifdef PROFILE
+	add_request_timer.start();
+#endif
+	super::do_add_request(std::move(request),
+			      client_id,
+			      req_params,
+			      time,
+			      addl_cost);
+	// no call to schedule_request for pull version
+#ifdef PROFILE
+	add_request_timer.stop();
+#endif
+      }
+
+
+      inline PullReq pull_request() {
+	return pull_request(get_time());
+      }
+
+
+      PullReq pull_request(Time now) {
+	PullReq result;
+	typename super::DataGuard g(this->data_mtx);
+#ifdef PROFILE
+	pull_request_timer.start();
+#endif
+
+	typename super::NextReq next = super::do_next_request(now);
+	result.type = next.type;
+	switch(next.type) {
+	case super::NextReqType::none:
+	  return result;
+	  break;
+	case super::NextReqType::future:
+	  result.data = next.when_ready;
+	  return result;
+	  break;
+	case super::NextReqType::returning:
+	  // to avoid nesting, break out and let code below handle this case
+	  break;
+	default:
+	  assert(false);
+	}
+
+	// we'll only get here if we're returning an entry
+
+	auto process_f =
+	  [&] (PullReq& pull_result, PhaseType phase) ->
+	  std::function<void(const C&,
+			     typename super::RequestRef&)> {
+	  return [&pull_result, phase](const C& client,
+				       typename super::RequestRef& request) {
+	    pull_result.data =
+	    typename PullReq::Retn{client, std::move(request), phase};
+	  };
+	};
+
+	switch(next.heap_id) {
+	case super::HeapId::reservation:
+	  super::pop_process_request(this->resv_heap,
+				     process_f(result, PhaseType::reservation));
+	  ++this->reserv_sched_count;
+	  break;
+	case super::HeapId::ready:
+	  super::pop_process_request(this->ready_heap,
+				     process_f(result, PhaseType::priority));
+	  { // need to use retn temporarily
+	    auto& retn = boost::get<typename PullReq::Retn>(result.data);
+	    super::reduce_reservation_tags(retn.client);
+	  }
+	  ++this->prop_sched_count;
+	  break;
+	default:
+	  assert(false);
+	}
+
+#ifdef PROFILE
+	pull_request_timer.stop();
+#endif
+	return result;
+      } // pull_request
+
+
+    protected:
+
+
+      // data_mtx should be held when called; unfortunately this
+      // function has to be repeated in both push & pull
+      // specializations
+      typename super::NextReq next_request() {
+	return next_request(get_time());
+      }
+    }; // class PullPriorityQueue
+
+
+    // PUSH version
+    template<typename C, typename R, uint B=2>
+    class PushPriorityQueue : public PriorityQueueBase<C,R,B> {
+
+    protected:
+
+      using super = PriorityQueueBase<C,R,B>;
+
+    public:
+
+      // a function to see whether the server can handle another request
+      using CanHandleRequestFunc = std::function<bool(void)>;
+
+      // a function to submit a request to the server; the second
+      // parameter is a callback when it's completed
+      using HandleRequestFunc =
+	std::function<void(const C&,typename super::RequestRef,PhaseType)>;
+
+    protected:
+
+      CanHandleRequestFunc can_handle_f;
+      HandleRequestFunc    handle_f;
+      // for handling timed scheduling
+      std::mutex  sched_ahead_mtx;
+      std::condition_variable sched_ahead_cv;
+      Time sched_ahead_when = TimeZero;
+
+#ifdef PROFILE
+    public:
+      ProfileTimer<std::chrono::nanoseconds> add_request_timer;
+      ProfileTimer<std::chrono::nanoseconds> request_complete_timer;
+    protected:
+#endif
+
+      // NB: threads declared last, so constructed last and destructed first
+
+      std::thread sched_ahead_thd;
+
+    public:
+
+      // push full constructor
+      template<typename Rep, typename Per>
+      PushPriorityQueue(typename super::ClientInfoFunc _client_info_f,
+			CanHandleRequestFunc _can_handle_f,
+			HandleRequestFunc _handle_f,
+			std::chrono::duration<Rep,Per> _idle_age,
+			std::chrono::duration<Rep,Per> _erase_age,
+			std::chrono::duration<Rep,Per> _check_time,
+			bool _allow_limit_break = false) :
+	super(_client_info_f,
+	      _idle_age, _erase_age, _check_time,
+	      _allow_limit_break)
+      {
+	can_handle_f = _can_handle_f;
+	handle_f = _handle_f;
+	sched_ahead_thd = std::thread(&PushPriorityQueue::run_sched_ahead, this);
+      }
+
+
+      // push convenience constructor
+      PushPriorityQueue(typename super::ClientInfoFunc _client_info_f,
+			CanHandleRequestFunc _can_handle_f,
+			HandleRequestFunc _handle_f,
+			bool _allow_limit_break = false) :
+	PushPriorityQueue(_client_info_f,
+			  _can_handle_f,
+			  _handle_f,
+			  std::chrono::minutes(10),
+			  std::chrono::minutes(15),
+			  std::chrono::minutes(6),
+			  _allow_limit_break)
+      {
+	// empty
+      }
+
+
+      ~PushPriorityQueue() {
+	this->finishing = true;
+	sched_ahead_cv.notify_one();
+	sched_ahead_thd.join();
+      }
+
+    public:
+
+      inline void add_request(const R& request,
+			      const C& client_id,
+			      const ReqParams& req_params,
+			      double addl_cost = 0.0) {
+	add_request(typename super::RequestRef(new R(request)),
+		    client_id,
+		    req_params,
+		    get_time(),
+		    addl_cost);
+      }
+
+
+      inline void add_request(typename super::RequestRef&& request,
+			      const C& client_id,
+			      const ReqParams& req_params,
+			      double addl_cost = 0.0) {
+	add_request(request, req_params, client_id, get_time(), addl_cost);
+      }
+
+
+      inline void add_request_time(const R& request,
+				   const C& client_id,
+				   const ReqParams& req_params,
+				   const Time time,
+				   double addl_cost = 0.0) {
+	add_request(typename super::RequestRef(new R(request)),
+		    client_id,
+		    req_params,
+		    time,
+		    addl_cost);
+      }
+
+
+      void add_request(typename super::RequestRef&& request,
+		       const C&         client_id,
+		       const ReqParams& req_params,
+		       const Time       time,
+		       double           addl_cost = 0.0) {
+	typename super::DataGuard g(this->data_mtx);
+#ifdef PROFILE
+	add_request_timer.start();
+#endif
+	super::do_add_request(std::move(request),
+			      client_id,
+			      req_params,
+			      time,
+			      addl_cost);
+	schedule_request();
+#ifdef PROFILE
+	add_request_timer.stop();
+#endif
+      }
+
+
+      void request_completed() {
+	typename super::DataGuard g(this->data_mtx);
+#ifdef PROFILE
+	request_complete_timer.start();
+#endif
+	schedule_request();
+#ifdef PROFILE
+	request_complete_timer.stop();
+#endif
+      }
+
+    protected:
+
+      // data_mtx should be held when called; furthermore, the heap
+      // should not be empty and the top element of the heap should
+      // not be already handled
+      //
+      // NOTE: the use of "super::ClientRec" in either the template
+      // construct or as a parameter to submit_top_request generated
+      // a compiler error in g++ 4.8.4, when ClientRec was
+      // "protected" rather than "public". By g++ 6.3.1 this was not
+      // an issue. But for backwards compatibility
+      // PriorityQueueBase::ClientRec is public.
+      template<typename C1,
+	       IndIntruHeapData super::ClientRec::*C2,
+	       typename C3,
+	       uint B4>
+      C submit_top_request(IndIntruHeap<C1,typename super::ClientRec,C2,C3,B4>& heap,
+			   PhaseType phase) {
+	C client_result;
+	super::pop_process_request(heap,
+				   [this, phase, &client_result]
+				   (const C& client,
+				    typename super::RequestRef& request) {
+				     client_result = client;
+				     handle_f(client, std::move(request), phase);
+				   });
+	return client_result;
+      }
+
+
+      // data_mtx should be held when called
+      void submit_request(typename super::HeapId heap_id) {
+	C client;
+	switch(heap_id) {
+	case super::HeapId::reservation:
+	  // don't need to note client
+	  (void) submit_top_request(this->resv_heap, PhaseType::reservation);
+	  // unlike the other two cases, we do not reduce reservation
+	  // tags here
+	  ++this->reserv_sched_count;
+	  break;
+	case super::HeapId::ready:
+	  client = submit_top_request(this->ready_heap, PhaseType::priority);
+	  super::reduce_reservation_tags(client);
+	  ++this->prop_sched_count;
+	  break;
+	default:
+	  assert(false);
+	}
+      } // submit_request
+
+
+      // data_mtx should be held when called; unfortunately this
+      // function has to be repeated in both push & pull
+      // specializations
+      typename super::NextReq next_request() {
+	return next_request(get_time());
+      }
+
+
+      // data_mtx should be held when called; overrides member
+      // function in base class to add check for whether a request can
+      // be pushed to the server
+      typename super::NextReq next_request(Time now) {
+	if (!can_handle_f()) {
+	  typename super::NextReq result;
+	  result.type = super::NextReqType::none;
+	  return result;
+	} else {
+	  return super::do_next_request(now);
+	}
+      } // next_request
+
+
+      // data_mtx should be held when called
+      void schedule_request() {
+	typename super::NextReq next_req = next_request();
+	switch (next_req.type) {
+	case super::NextReqType::none:
+	  return;
+	case super::NextReqType::future:
+	  sched_at(next_req.when_ready);
+	  break;
+	case super::NextReqType::returning:
+	  submit_request(next_req.heap_id);
+	  break;
+	default:
+	  assert(false);
+	}
+      }
+
+
+      // this is the thread that handles running schedule_request at
+      // future times when nothing can be scheduled immediately
+      void run_sched_ahead() {
+	std::unique_lock<std::mutex> l(sched_ahead_mtx);
+
+	while (!this->finishing) {
+	  if (TimeZero == sched_ahead_when) {
+	    sched_ahead_cv.wait(l);
+	  } else {
+	    Time now;
+	    while (!this->finishing && (now = get_time()) < sched_ahead_when) {
+	      long microseconds_l = long(1 + 1000000 * (sched_ahead_when - now));
+	      auto microseconds = std::chrono::microseconds(microseconds_l);
+	      sched_ahead_cv.wait_for(l, microseconds);
+	    }
+	    sched_ahead_when = TimeZero;
+	    if (this->finishing) return;
+
+	    l.unlock();
+	    if (!this->finishing) {
+	      typename super::DataGuard g(this->data_mtx);
+	      schedule_request();
+	    }
+	    l.lock();
+	  }
+	}
+      }
+
+
+      void sched_at(Time when) {
+	std::lock_guard<std::mutex> l(sched_ahead_mtx);
+	if (TimeZero == sched_ahead_when || when < sched_ahead_when) {
+	  sched_ahead_when = when;
+	  sched_ahead_cv.notify_one();
+	}
+      }
+    }; // class PushPriorityQueue
+
+  } // namespace dmclock
+} // namespace crimson
diff --git a/src/dmclock_util.cc b/src/dmclock_util.cc
new file mode 100644
index 00000000000..865b60d42a8
--- /dev/null
+++ b/src/dmclock_util.cc
@@ -0,0 +1,27 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2017 Red Hat Inc.
+ */
+
+
+#include <signal.h>
+
+#include <iomanip>
+#include <sstream>
+
+#include "dmclock_util.h"
+
+
+std::string crimson::dmclock::format_time(const Time& time, uint modulo) {
+  long subtract = long(time / modulo) * modulo;
+  std::stringstream ss;
+  ss << std::fixed << std::setprecision(4) << (time - subtract);
+  return ss.str();
+}
+
+
+void crimson::dmclock::debugger() {
+  raise(SIGCONT);
+}
diff --git a/src/dmclock_util.h b/src/dmclock_util.h
new file mode 100644
index 00000000000..d12c6f9eb63
--- /dev/null
+++ b/src/dmclock_util.h
@@ -0,0 +1,45 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2017 Red Hat Inc.
+ */
+
+
+#pragma once
+
+
+#include <unistd.h>
+#include <assert.h>
+#include <sys/time.h>
+
+#include <limits>
+#include <cmath>
+#include <chrono>
+
+
+namespace crimson {
+  namespace dmclock {
+    // we're using double to represent time, but we could change it by
+    // changing the following declarations (and by making sure a min
+    // function existed)
+    using Time = double;
+    static const Time TimeZero = 0.0;
+    static const Time TimeMax = std::numeric_limits<Time>::max();
+    static const double NaN = nan("");
+
+
+    inline Time get_time() {
+      struct timeval now;
+      auto result = gettimeofday(&now, NULL);
+      (void) result;
+      assert(0 == result);
+      return now.tv_sec + (now.tv_usec / 1000000.0);
+    }
+
+    std::string format_time(const Time& time, uint modulo = 1000);
+
+    void debugger();
+
+  } // namespace dmclock
+} // namespace crimson
diff --git a/support/CMakeLists.txt b/support/CMakeLists.txt
new file mode 100644
index 00000000000..552439ebc59
--- /dev/null
+++ b/support/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectory(test)
diff --git a/support/src/debug.h b/support/src/debug.h
new file mode 100644
index 00000000000..2a78cc82309
--- /dev/null
+++ b/support/src/debug.h
@@ -0,0 +1,17 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ */
+
+
+#pragma once
+
+
+#include <signal.h>
+
+
+inline void debugger() {
+    raise(SIGCONT);
+}
diff --git a/support/src/heap.h b/support/src/heap.h
new file mode 100644
index 00000000000..0f4d24f7c2d
--- /dev/null
+++ b/support/src/heap.h
@@ -0,0 +1,240 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ */
+
+
+#pragma once
+
+
+#include <vector>
+#include <ostream>
+
+#include "assert.h"
+
+
+namespace crimson {
+
+  /*
+   * T : type of data held in the heap.
+   *
+   * C : class that implements operator() with two arguments and
+   * returns a boolean when the first argument is greater than (higher
+   * in priority than) the second.
+   */
+  template<typename T, typename C>
+  class Heap {
+
+  public:
+
+    class iterator {
+
+      friend Heap<T,C>;
+
+      Heap<T,C>& heap;
+      int        index;
+
+      iterator(Heap<T,C>& _heap, int _index) :
+	heap(_heap),
+	index(_index)
+      {
+	// empty
+      }
+
+    public:
+
+      iterator(iterator&& other) :
+	heap(other.heap),
+	index(other.index)
+      {
+	// empty
+      }
+
+      iterator& operator++() {
+	++index;
+	return *this;
+      }
+
+      bool operator==(const iterator& other) const {
+	return index == other.index;
+      }
+
+      bool operator!=(const iterator& other) const {
+	return !(*this == other);
+      }
+
+      T& operator*() {
+	return heap.data[index];
+      }
+
+      // the item this iterator refers to
+      void increase() {
+	heap.siftUp(index);
+      }
+    }; // class iterator
+
+    friend iterator;
+
+  protected:
+
+    std::vector<T> data;
+    int count;
+    C comparator;
+
+    // parent(0) should be a negative value, which it is due to
+    // truncating towards negative infinity
+    static inline int parent(int i) { return (i - 1) / 2; }
+
+    static inline int lhs(int i) { return 2*i + 1; }
+
+    static inline int rhs(int i) { return 2*i + 2; }
+
+    void siftUp(int i) {
+      assert(i < count);
+
+      while (i > 0) {
+	int pi = parent(i);
+	if (!comparator(data[i], data[pi])) {
+	  break;
+	}
+
+	std::swap(data[i], data[pi]);
+	i = pi;
+      }
+    }
+
+    void siftDown(int i) {
+      while (i < count) {
+	int li = lhs(i);
+	int ri = rhs(i);
+
+	if (li < count) {
+	  if (comparator(data[li], data[i])) {
+	    if (ri < count && comparator(data[ri], data[li])) {
+	      std::swap(data[i], data[ri]);
+	      i = ri;
+	    } else {
+	      std::swap(data[i], data[li]);
+	      i = li;
+	    }
+	  } else if (ri < count && comparator(data[ri], data[i])) {
+	    std::swap(data[i], data[ri]);
+	    i = ri;
+	  } else {
+	    break;
+	  }
+	} else {
+	  break;
+	}
+      }
+    }
+
+
+  public:
+
+    Heap() :
+      count(0)
+    {
+      // empty
+    }
+
+    Heap(const Heap<T,C>& other) {
+      data.resize(other.data.size());
+      for (int i = 0; i < other.count; ++i) {
+	data[i] = other.data[i];
+      }
+      count = other.count;
+    }
+
+    const Heap<T,C>& operator=(const Heap<T,C>& other) {
+      data.resize(other.data.size());
+      for (int i = 0; i < other.count; ++i) {
+	data[i] = other.data[i];
+      }
+      count = other.count;
+      return *this;
+    }
+
+    bool empty() const { return 0 == count; }
+
+    T& top() { return data[0]; }
+
+    void push(T item) {
+      int i = count++;
+      data.push_back(item);
+      siftUp(i);
+    }
+
+    void pop() {
+      data[0] = data[--count];
+      data.resize(count);
+      siftDown(0);
+    }
+
+    void updateTop() {
+      siftDown(0);
+    }
+
+    void clear() {
+      count = 0;
+      data.resize(0);
+    }
+
+    iterator begin() {
+      return iterator(*this, 0);
+    }
+
+    iterator end() {
+      return iterator(*this, count);
+    }
+
+    std::ostream& displaySorted(std::ostream& out,
+				std::function<bool(const T&)> filter,
+				bool insert_line_breaks = true) const {
+      Heap<T,C> temp = *this;
+
+      bool first = true;
+      out << "[ ";
+
+      while(!temp.empty()) {
+	const T& top = temp.top();
+	if (filter(top)) {
+	  if (!first) {
+	    out << ", ";
+	  }
+	  if (insert_line_breaks) {
+	    out << std::endl << "    ";
+	  }
+	  out << temp.top();
+	  first = false;
+	}
+	temp.pop();
+      }
+
+      out << " ]";
+      if (insert_line_breaks) {
+	out << std::endl;
+      }
+      return out;
+    }
+
+    template<typename T1, typename T2>
+    friend std::ostream& operator<<(std::ostream&, const Heap<T1,T2>&);
+  }; // class Heap
+
+
+  template<typename T1, typename T2>
+  std::ostream& operator<<(std::ostream& out, const Heap<T1,T2>& h) {
+    out << "[ ";
+    if (h.count) {
+      out << h.data[0];
+    }
+    for (int i = 1; i < h.count; i++) {
+      out << ", " << h.data[i];
+    }
+    out << " ]";
+    return out;
+  }
+} // namespace
diff --git a/support/src/indirect_intrusive_heap.h b/support/src/indirect_intrusive_heap.h
new file mode 100644
index 00000000000..b6075bda22f
--- /dev/null
+++ b/support/src/indirect_intrusive_heap.h
@@ -0,0 +1,549 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ */
+
+
+#pragma once
+
+
+#include <memory>
+#include <vector>
+#include <string>
+#include <iostream>
+#include <functional>
+#include <algorithm>
+
+#include "assert.h"
+
+
+namespace crimson {
+  using IndIntruHeapData = size_t;
+
+  /* T is the ultimate data that's being stored in the heap, although
+   *   through indirection.
+   *
+   * I is the indirect type that will actually be stored in the heap
+   *   and that must allow dereferencing (via operator*) to yield a
+   *   T&.
+   *
+   * C is a functor when given two T&'s will return true if the first
+   *   must precede the second.
+   *
+   * heap_info is a data member pointer as to where the heap data in T
+   * is stored.
+   *
+   * K is the branching factor of the heap, default is 2 (binary heap).
+   */
+  template<typename I,
+	   typename T,
+	   IndIntruHeapData T::*heap_info,
+	   typename C,
+	   uint K = 2>
+  class IndIntruHeap {
+
+    // shorthand
+    using HeapIndex = IndIntruHeapData;
+
+    static_assert(
+      std::is_same<T,typename std::pointer_traits<I>::element_type>::value,
+      "class I must resolve to class T by indirection (pointer dereference)");
+
+    static_assert(
+      std::is_same<bool,
+      typename std::result_of<C(const T&,const T&)>::type>::value,
+      "class C must define operator() to take two const T& and return a bool");
+
+    static_assert(K >= 2, "K (degree of branching) must be at least 2");
+
+    class Iterator {
+      friend IndIntruHeap<I, T, heap_info, C, K>;
+
+      IndIntruHeap<I, T, heap_info, C, K>& heap;
+      HeapIndex                            index;
+
+      Iterator(IndIntruHeap<I, T, heap_info, C, K>& _heap, HeapIndex _index) :
+	heap(_heap),
+	index(_index)
+      {
+	// empty
+      }
+
+    public:
+
+      Iterator(Iterator&& other) :
+	heap(other.heap),
+	index(other.index)
+      {
+	// empty
+      }
+
+      Iterator(const Iterator& other) :
+	heap(other.heap),
+	index(other.index)
+      {
+	// empty
+      }
+
+      Iterator& operator=(Iterator&& other) {
+	std::swap(heap, other.heap);
+	std::swap(index, other.index);
+	return *this;
+      }
+
+      Iterator& operator=(const Iterator& other) {
+	heap = other.heap;
+	index = other.index;
+      }
+
+      Iterator& operator++() {
+	if (index <= heap.count) {
+	  ++index;
+	}
+	return *this;
+      }
+
+      bool operator==(const Iterator& other) const {
+	return &heap == &other.heap && index == other.index;
+      }
+
+      bool operator!=(const Iterator& other) const {
+	return !(*this == other);
+      }
+
+      T& operator*() {
+	return *heap.data[index];
+      }
+
+      T* operator->() {
+	return &(*heap.data[index]);
+      }
+
+#if 0
+      // the item this iterator refers to
+      void increase() {
+	heap.sift_up(index);
+      }
+#endif
+    }; // class Iterator
+
+
+    class ConstIterator {
+      friend IndIntruHeap<I, T, heap_info, C, K>;
+
+      const IndIntruHeap<I, T, heap_info, C, K>& heap;
+      HeapIndex                                  index;
+
+      ConstIterator(const IndIntruHeap<I, T, heap_info, C, K>& _heap,
+		    HeapIndex _index) :
+	heap(_heap),
+	index(_index)
+      {
+	// empty
+      }
+
+    public:
+
+      ConstIterator(ConstIterator&& other) :
+	heap(other.heap),
+	index(other.index)
+      {
+	// empty
+      }
+
+      ConstIterator(const ConstIterator& other) :
+	heap(other.heap),
+	index(other.index)
+      {
+	// empty
+      }
+
+      ConstIterator& operator=(ConstIterator&& other) {
+	std::swap(heap, other.heap);
+	std::swap(index, other.index);
+	return *this;
+      }
+
+      ConstIterator& operator=(const ConstIterator& other) {
+	heap = other.heap;
+	index = other.index;
+      }
+
+      ConstIterator& operator++() {
+	if (index <= heap.count) {
+	  ++index;
+	}
+	return *this;
+      }
+
+      bool operator==(const ConstIterator& other) const {
+	return &heap == &other.heap && index == other.index;
+      }
+
+      bool operator!=(const ConstIterator& other) const {
+	return !(*this == other);
+      }
+
+      const T& operator*() {
+	return *heap.data[index];
+      }
+
+      const T* operator->() {
+	return &(*heap.data[index]);
+      }
+    }; // class ConstIterator
+
+
+  protected:
+
+    std::vector<I> data;
+    HeapIndex      count;
+    C              comparator;
+
+  public:
+
+    IndIntruHeap() :
+      count(0)
+    {
+      // empty
+    }
+
+    IndIntruHeap(const IndIntruHeap<I,T,heap_info,C,K>& other) :
+      count(other.count)
+    {
+      for (HeapIndex i = 0; i < other.count; ++i) {
+	data.push_back(other.data[i]);
+      }
+    }
+
+    bool empty() const { return 0 == count; }
+
+    size_t size() const { return (size_t) count; }
+
+    T& top() { return *data[0]; }
+
+    const T& top() const { return *data[0]; }
+
+    I& top_ind() { return data[0]; }
+
+    const I& top_ind() const { return data[0]; }
+
+    void push(I&& item) {
+      HeapIndex i = count++;
+      intru_data_of(item) = i;
+      data.emplace_back(std::move(item));
+      sift_up(i);
+    }
+
+    void push(const I& item) {
+      I copy(item);
+      push(std::move(copy));
+    }
+
+    void pop() {
+      remove(0);
+    }
+
+    void remove(Iterator& i) {
+      remove(i.index);
+      i = end();
+    }
+
+    Iterator find(const I& ind_item) {
+      for (HeapIndex i = 0; i < count; ++i) {
+	if (data[i] == ind_item) {
+	  return Iterator(*this, i);
+	}
+      }
+      return end();
+    }
+
+    // when passing in value we do a comparison via operator==
+    Iterator find(const T& item) {
+      for (HeapIndex i = 0; i < count; ++i) {
+	if (*data[i] == item) {
+	  return Iterator(*this, i);
+	}
+      }
+      return end();
+    }
+
+    // reverse find -- start looking from bottom of heap
+    Iterator rfind(const I& ind_item) {
+      // HeapIndex is unsigned, so we can't allow to go negative; so
+      // we'll keep it one more than actual index
+      for (HeapIndex i = count; i > 0; --i) {
+	if (data[i-1] == ind_item) {
+	  return Iterator(*this, i-1);
+	}
+      }
+      return end();
+    }
+
+    // reverse find -- start looking from bottom of heap
+    Iterator rfind(const T& item) {
+      // HeapIndex is unsigned, so we can't allow to go negative; so
+      // we'll keep it one more than actual index
+      for (HeapIndex i = count; i > 0; --i) {
+	if (*data[i-1] == item) {
+	  return Iterator(*this, i-1);
+	}
+      }
+      return end();
+    }
+
+    ConstIterator find(const I& ind_item) const {
+      for (HeapIndex i = 0; i < count; ++i) {
+	if (data[i] == ind_item) {
+	  return ConstIterator(*this, i);
+	}
+      }
+      return cend();
+    }
+
+    // when passing in value we do a comparison via operator==
+    ConstIterator find(const T& item) const {
+      for (HeapIndex i = 0; i < count; ++i) {
+	if (*data[i] == item) {
+	  return ConstIterator(*this, i);
+	}
+      }
+      return cend();
+    }
+
+    // reverse find -- start looking from bottom of heap
+    ConstIterator rfind(const I& ind_item) const {
+      // HeapIndex is unsigned, so we can't allow to go negative; so
+      // we'll keep it one more than actual index
+      for (HeapIndex i = count; i > 0; --i) {
+	if (data[i-1] == ind_item) {
+	  return ConstIterator(*this, i-1);
+	}
+      }
+      return cend();
+    }
+
+    // reverse find -- start looking from bottom of heap
+    ConstIterator rfind(const T& item) const {
+      // HeapIndex is unsigned, so we can't allow to go negative; so
+      // we'll keep it one more than actual index
+      for (HeapIndex i = count; i > 0; --i) {
+	if (*data[i-1] == item) {
+	  return ConstIterator(*this, i-1);
+	}
+      }
+      return cend();
+    }
+
+    void promote(T& item) {
+      sift_up(item.*heap_info);
+    }
+
+    void demote(T& item) {
+      sift_down(item.*heap_info);
+    }
+
+    void adjust(T& item) {
+      sift(item.*heap_info);
+    }
+
+    Iterator begin() {
+      return Iterator(*this, 0);
+    }
+
+    Iterator end() {
+      return Iterator(*this, count);
+    }
+
+    ConstIterator cbegin() const {
+      return ConstIterator(*this, 0);
+    }
+
+    ConstIterator cend() const {
+      return ConstIterator(*this, count);
+    }
+
+    friend std::ostream& operator<<(std::ostream& out, const IndIntruHeap& h) {
+      auto i = h.data.cbegin();
+      if (i != h.data.cend()) {
+	out << **i;
+	++i;
+	while (i != h.data.cend()) {
+	  out << ", " << **i;
+	}
+      }
+      return out;
+    }
+
+    // can only be called if I is copyable; copies heap into a vector
+    // and sorts it before displaying it
+    std::ostream&
+    display_sorted(std::ostream& out,
+		   std::function<bool(const T&)> filter = all_filter) const {
+      static_assert(std::is_copy_constructible<I>::value,
+		    "cannot call display_sorted when class I is not copy"
+		    " constructible");
+      auto compare = [this] (const I first, const I second) -> bool {
+	return this->comparator(*first, *second);
+      };
+      std::vector<I> copy(data);
+      std::sort(copy.begin(), copy.end(), compare);
+
+      bool first = true;
+      for (auto c = copy.begin(); c != copy.end(); ++c) {
+	if (filter(**c)) {
+	  if (!first) {
+	    out << ", ";
+	  } else {
+	    first = false;
+	  }
+	  out << **c;
+	}
+      }
+
+      return out;
+    }
+
+
+  protected:
+
+    static IndIntruHeapData& intru_data_of(I& item) {
+      return (*item).*heap_info;
+    }
+
+    void remove(HeapIndex i) {
+      std::swap(data[i], data[--count]);
+      intru_data_of(data[i]) = i;
+      data.pop_back();
+
+      // the following needs to be sift (and not sift_down) as it can
+      // go up or down the heap; imagine the heap vector contains 0,
+      // 10, 100, 20, 30, 200, 300, 40; then 200 is removed, and 40
+      // would have to be sifted upwards
+      // sift(i);
+      sift(i);
+    }
+
+    // default value of filter parameter to display_sorted
+    static bool all_filter(const T& data) { return true; }
+
+    // when i is negative?
+    static inline HeapIndex parent(HeapIndex i) {
+      assert(0 != i);
+      return (i - 1) / K;
+    }
+
+    // index of left child when K==2, index of left-most child when K>2
+    static inline HeapIndex lhs(HeapIndex i) { return K*i + 1; }
+
+    // index of right child when K==2, index of right-most child when K>2
+    static inline HeapIndex rhs(HeapIndex i) { return K*i + K; }
+
+    void sift_up(HeapIndex i) {
+      while (i > 0) {
+	HeapIndex pi = parent(i);
+	if (!comparator(*data[i], *data[pi])) {
+	  break;
+	}
+
+	std::swap(data[i], data[pi]);
+	intru_data_of(data[i]) = i;
+	intru_data_of(data[pi]) = pi;
+	i = pi;
+      }
+    } // sift_up
+
+    // use this sift_down definition when K>2; it's more general and
+    // uses a loop; EnableBool insures template uses a template
+    // parameter
+    template<bool EnableBool=true>
+    typename std::enable_if<(K>2)&&EnableBool,void>::type sift_down(HeapIndex i) {
+      if (i >= count) return;
+      while (true) {
+	HeapIndex li = lhs(i);
+
+	if (li < count) {
+	  HeapIndex ri = std::min(rhs(i), count - 1);
+
+	  // find the index of min. child
+	  HeapIndex min_i = li;
+	  for (HeapIndex k = li + 1; k <= ri; ++k) {
+	    if (comparator(*data[k], *data[min_i])) {
+	      min_i = k;
+	    }
+	  }
+
+	  if (comparator(*data[min_i], *data[i])) {
+	    std::swap(data[i], data[min_i]);
+	    intru_data_of(data[i]) = i;
+	    intru_data_of(data[min_i]) = min_i;
+	    i = min_i;
+	  } else {
+	    // no child is smaller
+	    break;
+	  }
+	} else {
+	  // no children
+	  break;
+	}
+      }
+    } // sift_down
+
+    // use this sift_down definition when K==2; EnableBool insures
+    // template uses a template parameter
+    template<bool EnableBool=true>
+    typename std::enable_if<K==2&&EnableBool,void>::type sift_down(HeapIndex i) {
+      if (i >= count) return;
+      while (true) {
+	const HeapIndex li = lhs(i);
+	const HeapIndex ri = 1 + li;
+
+        if (li < count) {
+	  if (comparator(*data[li], *data[i])) {
+	    if (ri < count && comparator(*data[ri], *data[li])) {
+	      std::swap(data[i], data[ri]);
+	      intru_data_of(data[i]) = i;
+	      intru_data_of(data[ri]) = ri;
+	      i = ri;
+	    } else {
+	      std::swap(data[i], data[li]);
+	      intru_data_of(data[i]) = i;
+	      intru_data_of(data[li]) = li;
+	      i = li;
+            }
+	  } else if (ri < count && comparator(*data[ri], *data[i])) {
+	    std::swap(data[i], data[ri]);
+            intru_data_of(data[i]) = i;
+	    intru_data_of(data[ri]) = ri;
+	    i = ri;
+          } else {
+	    // no child is smaller
+            break;
+          }
+        } else {
+	  // no children
+          break;
+        }
+      } // while
+    } // sift_down
+
+    void sift(HeapIndex i) {
+      if (i == 0) {
+	// if we're at top, can only go down
+	sift_down(i);
+      } else {
+	HeapIndex pi = parent(i);
+	if (comparator(*data[i], *data[pi])) {
+	  // if we can go up, we will
+	  sift_up(i);
+	} else {
+	  // otherwise we'll try to go down
+	  sift_down(i);
+	}
+      }
+    } // sift
+  }; // class IndIntruHeap
+
+} // namespace crimson
diff --git a/support/src/intrusive_heap.h b/support/src/intrusive_heap.h
new file mode 100644
index 00000000000..291e5798149
--- /dev/null
+++ b/support/src/intrusive_heap.h
@@ -0,0 +1,214 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ */
+
+
+#pragma once
+
+
+#include <vector>
+#include <string>
+#include <iostream>
+#include <functional>
+
+#include "assert.h"
+
+
+namespace crimson {
+  using IntruHeapData = size_t;
+
+  // T = type of data in heap; I = functor that returns a non-const
+  // reference to IntruHeapData; C = functor that compares two const
+  // refs and return true if the first precedes the second
+  template<typename T, typename I, typename C>
+  class IntruHeap {
+
+    static_assert(
+      std::is_same<IntruHeapData&,typename std::result_of<I(T&)>::type>::value,
+      "class I must define operator() to take T& and return a IntruHeapData&.");
+
+    static_assert(
+      std::is_same<bool,typename std::result_of<C(const T&,const T&)>::type>::value,
+      "class C must define operator() to take two const T& and return a bool.");
+
+
+  protected:
+    using index_t = IntruHeapData;
+
+    std::vector<T> data;
+    index_t count;
+    I intru_data_of;
+    C comparator;
+
+  public:
+
+    IntruHeap() :
+      count(0)
+    {
+      // empty
+    }
+
+    IntruHeap(const IntruHeap<T,I,C>& other) :
+      count(other.count)
+    {
+      for (uint i = 0; i < other.count; ++i) {
+	data.push_back(other.data[i]);
+      }
+    }
+
+    bool empty() const { return 0 == count; }
+
+    T& top() { return data[0]; }
+
+    void push(T&& item) {
+      index_t i = count++;
+      intru_data_of(item) = i;
+      data.emplace_back(item);
+      sift_up(i);
+    }
+
+    void push(const T& item) {
+      T copy(item);
+      push(std::move(copy));
+    }
+
+    void pop() {
+      std::swap(data[0], data[--count]);
+      intru_data_of(data[0]) = 0;
+      data.pop_back();
+      sift_down(0);
+    }
+
+    void adjust_up(T& item) {
+      sift_up(intru_data_of(item));
+    }
+
+    void adjust_down(T& item) {
+      sift_down(intru_data_of(item));
+    }
+
+    void adjust(T& item) {
+      sift(intru_data_of(item));
+    }
+
+    friend std::ostream& operator<<(std::ostream& out, const IntruHeap& h) {
+      for (uint i = 0; i < h.count; ++i) {
+	out << h.data[i] << ", ";
+      }
+      return out;
+    }
+
+    std::ostream&
+    display_sorted(std::ostream& out,
+		   bool insert_line_breaks = true,
+		   std::function<bool(const T&)> filter = all_filter) const {
+      IntruHeap<T,I,C> copy = *this;
+
+      bool first = true;
+      out << "[ ";
+
+      while(!copy.empty()) {
+	const T& top = copy.top();
+	if (filter(top)) {
+	  if (!first) {
+	    out << ", ";
+	  }
+	  if (insert_line_breaks) {
+	    out << std::endl << "    ";
+	  }
+	  out << copy.top();
+	  first = false;
+	}
+	copy.pop();
+      }
+
+      out << " ]";
+      if (insert_line_breaks) {
+	out << std::endl;
+      }
+
+      return out;
+    }
+
+
+  protected:
+
+    // default value of filter parameter to display_sorted
+    static bool all_filter(const T& data) { return true; }
+
+    // when i is negative?
+    static inline index_t parent(index_t i) {
+      assert(0 != i);
+      return (i - 1) / 2;
+    }
+
+    static inline index_t lhs(index_t i) { return 2*i + 1; }
+
+    static inline index_t rhs(index_t i) { return 2*i + 2; }
+
+    void sift_up(index_t i) {
+      while (i > 0) {
+	index_t pi = parent(i);
+	if (!comparator(data[i], data[pi])) {
+	  break;
+	}
+
+	std::swap(data[i], data[pi]);
+	intru_data_of(data[i]) = i;
+	intru_data_of(data[pi]) = pi;
+	i = pi;
+      }
+    } // sift_up
+
+    void sift_down(index_t i) {
+      while (i < count) {
+	index_t li = lhs(i);
+	index_t ri = rhs(i);
+
+	if (li < count) {
+	  if (comparator(data[li], data[i])) {
+	    if (ri < count && comparator(data[ri], data[li])) {
+	      std::swap(data[i], data[ri]);
+	      intru_data_of(data[i]) = i;
+	      intru_data_of(data[ri]) = ri;
+	      i = ri;
+	    } else {
+	      std::swap(data[i], data[li]);
+	      intru_data_of(data[i]) = i;
+	      intru_data_of(data[li]) = li;
+	      i = li;
+	    }
+	  } else if (ri < count && comparator(data[ri], data[i])) {
+	    std::swap(data[i], data[ri]);
+	    intru_data_of(data[i]) = i;
+	    intru_data_of(data[ri]) = ri;
+	    i = ri;
+	  } else {
+	    break;
+	  }
+	} else {
+	  break;
+	}
+      }
+    } // sift_down
+
+    void sift(index_t i) {
+      if (i == 0) {
+	// if we're at top, can only go down
+	sift_down(i);
+      } else {
+	index_t pi = parent(i);
+	if (comparator(data[i], data[pi])) {
+	  // if we can go up, we will
+	  sift_up(i);
+	} else {
+	  // otherwise we'll try to go down
+	  sift_down(i);
+	}
+      }
+    } // sift
+  }; // class IntruHeap
+} // namespace crimson
diff --git a/support/src/profile.h b/support/src/profile.h
new file mode 100644
index 00000000000..77493c75be5
--- /dev/null
+++ b/support/src/profile.h
@@ -0,0 +1,114 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ */
+
+
+#pragma once
+
+
+#include <cmath>
+#include <chrono>
+
+
+namespace crimson {
+  template<typename T>
+  class ProfileBase {
+
+  protected:
+
+    using clock = std::chrono::steady_clock;
+
+    uint count = 0;
+    typename T::rep sum = 0;
+    typename T::rep sum_squares = 0;
+    typename T::rep low = 0;
+    typename T::rep high = 0;
+
+  public:
+
+    uint get_count() const { return count; }
+    typename T::rep get_sum() const { return sum; }
+    typename T::rep get_low() const { return low; }
+    typename T::rep get_high() const { return high; }
+    double get_mean() const {
+      if (0 == count) return nan("");
+      return sum / double(count); }
+    double get_std_dev() const {
+      if (0 == count) return nan("");
+      double variance =
+	(count * sum_squares - sum * sum) / double(count * count);
+      return sqrt(variance);
+    }
+  }; // class ProfileBase
+
+
+  // forward declaration for friend
+  template<typename T>
+  class ProfileCombiner;
+
+
+  template<typename T>
+  class ProfileTimer : public ProfileBase<T> {
+    friend ProfileCombiner<T>;
+
+    using super = ProfileBase<T>;
+
+    bool is_timing = false;
+    typename super::clock::time_point start_time;
+
+  public:
+
+    ProfileTimer() {
+    }
+
+    void start() {
+      assert(!is_timing);
+      start_time = super::clock::now();
+      is_timing = true;
+    }
+
+    void stop() {
+      assert(is_timing);
+      T duration = std::chrono::duration_cast<T>(super::clock::now() - start_time);
+      typename T::rep duration_count = duration.count();
+      this->sum += duration_count;
+      this->sum_squares += duration_count * duration_count;
+      if (0 == this->count) {
+	this->low = duration_count;
+	this->high = duration_count;
+      } else {
+	if (duration_count < this->low) this->low = duration_count;
+	else if (duration_count > this->high) this->high = duration_count;
+      }
+      ++this->count;
+      is_timing = false;
+    }
+  };  // class ProfileTimer
+
+
+  template<typename T>
+  class ProfileCombiner : public ProfileBase<T> {
+
+    using super = ProfileBase<T>;
+
+  public:
+
+    ProfileCombiner() {}
+
+    void combine(const ProfileTimer<T>& timer) {
+      if (0 == this->count) {
+	this->low = timer.low;
+	this->high = timer.high;
+      } else {
+	if (timer.low < this->low) this->low = timer.low;
+	else if (timer.high > this->high) this->high = timer.high;
+      }
+      this->count += timer.count;
+      this->sum += timer.sum;
+      this->sum_squares += timer.sum_squares;
+    }
+  }; // class ProfileCombiner
+} // namespace crimson
diff --git a/support/src/run_every.cc b/support/src/run_every.cc
new file mode 100644
index 00000000000..258baaa74c0
--- /dev/null
+++ b/support/src/run_every.cc
@@ -0,0 +1,73 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ */
+
+
+#include <iostream>
+
+#include "run_every.h"
+
+
+// can define ADD_MOVE_SEMANTICS, although not fully debugged and tested
+
+
+namespace chrono = std::chrono;
+
+
+#ifdef ADD_MOVE_SEMANTICS
+crimson::RunEvery::RunEvery()
+{
+  // empty
+}
+
+
+crimson::RunEvery& crimson::RunEvery::operator=(crimson::RunEvery&& other)
+{
+  // finish run every thread
+  {
+    Guard g(mtx);
+    finishing = true;
+    cv.notify_one();
+  }
+  if (thd.joinable()) {
+    thd.join();
+  }
+
+  // transfer info over from previous thread
+  finishing.store(other.finishing);
+  wait_period = other.wait_period;
+  body = other.body;
+
+  // finish other thread
+  other.finishing.store(true);
+  other.cv.notify_one();
+
+  // start this thread
+  thd = std::thread(&RunEvery::run, this);
+
+  return *this;
+}
+#endif
+
+
+crimson::RunEvery::~RunEvery() {
+  finishing = true;
+  cv.notify_all();
+  thd.join();
+}
+
+
+void crimson::RunEvery::run() {
+  Lock l(mtx);
+  while(!finishing) {
+    TimePoint until = chrono::steady_clock::now() + wait_period;
+    while (!finishing && chrono::steady_clock::now() < until) {
+      cv.wait_until(l, until);
+    }
+    if (!finishing) {
+      body();
+    }
+  }
+}
diff --git a/support/src/run_every.h b/support/src/run_every.h
new file mode 100644
index 00000000000..c3499da91ef
--- /dev/null
+++ b/support/src/run_every.h
@@ -0,0 +1,68 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ */
+
+
+#pragma once
+
+#include <chrono>
+#include <mutex>
+#include <condition_variable>
+#include <thread>
+
+
+namespace crimson {
+  using std::chrono::duration_cast;
+  using std::chrono::milliseconds;
+
+  // runs a given simple function object waiting wait_period
+  // milliseconds between; the destructor stops the other thread
+  // immediately
+  class RunEvery {
+    using Lock      = std::unique_lock<std::mutex>;
+    using Guard     = std::lock_guard<std::mutex>;
+    using TimePoint = std::chrono::steady_clock::time_point;
+
+    bool                      finishing = false;
+    std::chrono::milliseconds wait_period;
+    std::function<void()>     body;
+    std::mutex                mtx;
+    std::condition_variable   cv;
+
+    // put threads last so all other variables are initialized first
+
+    std::thread               thd;
+
+  public:
+
+#ifdef ADD_MOVE_SEMANTICS
+    RunEvery();
+#endif
+
+    template<typename D>
+    RunEvery(D                     _wait_period,
+	     std::function<void()> _body) :
+      wait_period(duration_cast<milliseconds>(_wait_period)),
+      body(_body)
+    {
+      thd = std::thread(&RunEvery::run, this);
+    }
+
+    RunEvery(const RunEvery& other) = delete;
+    RunEvery& operator=(const RunEvery& other) = delete;
+    RunEvery(RunEvery&& other) = delete;
+#ifdef ADD_MOVE_SEMANTICS
+    RunEvery& operator=(RunEvery&& other);
+#else
+    RunEvery& operator=(RunEvery&& other) = delete;
+#endif
+
+    ~RunEvery();
+
+  protected:
+
+    void run();
+  };
+}
diff --git a/support/test/CMakeLists.txt b/support/test/CMakeLists.txt
new file mode 100644
index 00000000000..addea6c96a9
--- /dev/null
+++ b/support/test/CMakeLists.txt
@@ -0,0 +1,29 @@
+include_directories(../src)
+
+set(local_flags "-Wall -pthread")
+
+# dmclock does not use intrusive heap (but it does use indirect
+# intrusive heap), so we won't use this code
+if(false)
+  set(srcs
+    test_intrusive_heap.cc)
+  add_executable(test_intru_heap test_intrusive_heap.cc)
+  set_source_files_properties(${srcs}
+    PROPERTIES
+    COMPILE_FLAGS "${local_flags}")
+endif(false)
+
+set(test_srcs test_indirect_intrusive_heap.cc)
+
+set_source_files_properties(${test_srcs}
+  PROPERTIES
+  COMPILE_FLAGS "${local_flags}"
+  )
+
+add_executable(dmclock-data-struct-tests ${test_srcs})
+
+target_link_libraries(dmclock-data-struct-tests
+  LINK_PRIVATE gtest gtest_main pthread)
+
+add_test(NAME dmclock-data-struct-tests
+  COMMAND $<TARGET_FILE:dmclock-data-struct-tests>)
diff --git a/support/test/test_ind_intru_heap.cc b/support/test/test_ind_intru_heap.cc
new file mode 100644
index 00000000000..9ec03b5cacf
--- /dev/null
+++ b/support/test/test_ind_intru_heap.cc
@@ -0,0 +1,82 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ */
+
+
+#include <memory>
+#include <string>
+#include <iostream>
+
+#include "indirect_intrusive_heap.h"
+
+
+class TestCompare;
+
+
+class Test1 {
+    friend TestCompare;
+
+    int data;
+
+public:
+
+    crimson::IndIntruHeapData heap_data;
+
+    Test1(int _data) : data(_data) {}
+
+    friend std::ostream& operator<<(std::ostream& out, const Test1& d) {
+        out << d.data << " (" << d.heap_data << ")";
+        return out;
+    }
+
+    int& the_data() { return data; }
+};
+
+
+struct TestCompare {
+    bool operator()(const Test1& d1, const Test1& d2) {
+        return d1.data < d2.data;
+    }
+};
+
+
+int main(int argc, char** argv) {
+    Test1 d1(2);
+    Test1 d2(3);
+    Test1 d3(1);
+    Test1 d4(-5);
+
+    crimson::IndIntruHeap<std::shared_ptr<Test1>, Test1, &Test1::heap_data, TestCompare> my_heap;
+
+    const std::shared_ptr<Test1> d99 = std::make_shared<Test1>(99);
+
+    my_heap.push(std::make_shared<Test1>(2));
+    my_heap.push(d99);
+    my_heap.push(std::make_shared<Test1>(1));
+    my_heap.push(std::make_shared<Test1>(-5));
+    my_heap.push(std::make_shared<Test1>(12));
+    my_heap.push(std::make_shared<Test1>(-12));
+    my_heap.push(std::make_shared<Test1>(-7));
+
+    std::cout << my_heap << std::endl;
+
+    auto& t = my_heap.top();
+    t.the_data() = 17;
+    my_heap.adjust_down(t);
+
+    std::cout << my_heap << std::endl;
+
+    my_heap.display_sorted(std::cout);
+
+    while (!my_heap.empty()) {
+        auto& top = my_heap.top();
+        std::cout << top << std::endl;
+        my_heap.pop();
+        std::cout << my_heap << std::endl;
+    }
+
+    return 0;
+}
diff --git a/support/test/test_indirect_intrusive_heap.cc b/support/test/test_indirect_intrusive_heap.cc
new file mode 100644
index 00000000000..23863a24ce9
--- /dev/null
+++ b/support/test/test_indirect_intrusive_heap.cc
@@ -0,0 +1,930 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ */
+
+#include <iostream>
+#include <memory>
+#include <set>
+
+#include "gtest/gtest.h"
+
+#include "indirect_intrusive_heap.h"
+
+
+struct Elem {
+  int data;
+
+  crimson::IndIntruHeapData heap_data;
+  crimson::IndIntruHeapData heap_data_alt;
+
+  Elem(int _data) : data(_data) { }
+
+  bool operator==(const Elem& other) {
+    return data == other.data;
+  }
+
+  friend std::ostream& operator<<(std::ostream& out, const Elem& d) {
+    out << d.data;
+    return out;
+  }
+};
+
+
+// sorted low to high
+struct ElemCompare {
+  bool operator()(const Elem& d1, const Elem& d2) const {
+    return d1.data < d2.data;
+  }
+};
+
+
+// first all evens precede all odds, then they're sorted high to low
+struct ElemCompareAlt {
+  bool operator()(const Elem& d1, const Elem& d2) {
+    if (0 == d1.data % 2) {
+      if (0 == d2.data % 2) {
+	return d1.data > d2.data;
+      } else {
+	return true;
+      }
+    } else if (0 == d2.data % 2) {
+      return false;
+    } else {
+      return d1.data > d2.data;
+    }
+  }
+};
+
+
+class HeapFixture1: public ::testing::Test {
+
+public:
+
+  crimson::IndIntruHeap<std::shared_ptr<Elem>,
+			Elem,
+			&Elem::heap_data,
+			ElemCompare> heap;
+
+  std::shared_ptr<Elem> data1, data2, data3, data4, data5, data6, data7;
+
+  void SetUp() {
+    data1 = std::make_shared<Elem>(2);
+    data2 = std::make_shared<Elem>(99);
+    data3 = std::make_shared<Elem>(1);
+    data4 = std::make_shared<Elem>(-5);
+    data5 = std::make_shared<Elem>(12);
+    data6 = std::make_shared<Elem>(-12);
+    data7 = std::make_shared<Elem>(-7);
+
+    heap.push(data1);
+    heap.push(data2);
+    heap.push(data3);
+    heap.push(data4);
+    heap.push(data5);
+    heap.push(data6);
+    heap.push(data7);
+  }
+
+  void TearDown() {
+    // nothing to do
+  }
+}; // class HeapFixture1
+
+TEST(IndIntruHeap, shared_ptr) {
+  crimson::IndIntruHeap<std::shared_ptr<Elem>,
+			Elem,
+			&Elem::heap_data,
+			ElemCompare> heap;
+
+  EXPECT_TRUE(heap.empty());
+
+  heap.push(std::make_shared<Elem>(2));
+
+  EXPECT_FALSE(heap.empty());
+
+  heap.push(std::make_shared<Elem>(99));
+  heap.push(std::make_shared<Elem>(1));
+  heap.push(std::make_shared<Elem>(-5));
+  heap.push(std::make_shared<Elem>(12));
+  heap.push(std::make_shared<Elem>(-12));
+  heap.push(std::make_shared<Elem>(-7));
+
+  // std::cout << heap << std::endl;
+
+  EXPECT_FALSE(heap.empty());
+
+  EXPECT_EQ(-12, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(-7, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(-5, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(1, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(2, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(12, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(99, heap.top().data);
+
+  EXPECT_FALSE(heap.empty());
+  heap.pop();
+  EXPECT_TRUE(heap.empty());
+}
+
+
+TEST(IndIntruHeap, unique_ptr) {
+  crimson::IndIntruHeap<std::unique_ptr<Elem>,
+			Elem,
+			&Elem::heap_data,
+			ElemCompare> heap;
+
+  EXPECT_TRUE(heap.empty());
+
+  heap.push(std::unique_ptr<Elem>(new Elem(2)));
+
+  EXPECT_FALSE(heap.empty());
+
+  heap.push(std::unique_ptr<Elem>(new Elem(99)));
+  heap.push(std::unique_ptr<Elem>(new Elem(1)));
+  heap.push(std::unique_ptr<Elem>(new Elem(-5)));
+  heap.push(std::unique_ptr<Elem>(new Elem(12)));
+  heap.push(std::unique_ptr<Elem>(new Elem(-12)));
+  heap.push(std::unique_ptr<Elem>(new Elem(-7)));
+
+  EXPECT_FALSE(heap.empty());
+
+  EXPECT_EQ(-12, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(-7, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(-5, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(1, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(2, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(12, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(99, heap.top().data);
+
+  EXPECT_FALSE(heap.empty());
+  heap.pop();
+  EXPECT_TRUE(heap.empty());
+}
+
+
+TEST(IndIntruHeap, regular_ptr) {
+  crimson::IndIntruHeap<Elem*, Elem, &Elem::heap_data, ElemCompare> heap;
+
+  EXPECT_TRUE(heap.empty());
+
+  heap.push(new Elem(2));
+
+  EXPECT_FALSE(heap.empty());
+
+  heap.push(new Elem(99));
+  heap.push(new Elem(1));
+  heap.push(new Elem(-5));
+  heap.push(new Elem(12));
+  heap.push(new Elem(-12));
+  heap.push(new Elem(-7));
+
+  EXPECT_FALSE(heap.empty());
+
+  EXPECT_EQ(-12, heap.top().data);
+  delete &heap.top();
+  heap.pop();
+  EXPECT_EQ(-7, heap.top().data);
+  delete &heap.top();
+  heap.pop();
+  EXPECT_EQ(-5, heap.top().data);
+  delete &heap.top();
+  heap.pop();
+  EXPECT_EQ(1, heap.top().data);
+  delete &heap.top();
+  heap.pop();
+  EXPECT_EQ(2, heap.top().data);
+  delete &heap.top();
+  heap.pop();
+  EXPECT_EQ(12, heap.top().data);
+  delete &heap.top();
+  heap.pop();
+  EXPECT_EQ(99, heap.top().data);
+
+  delete &heap.top();
+
+  EXPECT_FALSE(heap.empty());
+  heap.pop();
+  EXPECT_TRUE(heap.empty());
+}
+
+
+TEST(IndIntruHeap, K_3) {
+  crimson::IndIntruHeap<std::shared_ptr<Elem>,
+			Elem,
+			&Elem::heap_data,
+			ElemCompare,
+			3> heap;
+
+  EXPECT_TRUE(heap.empty());
+
+  heap.push(std::make_shared<Elem>(2));
+
+  EXPECT_FALSE(heap.empty());
+
+  heap.push(std::make_shared<Elem>(99));
+  heap.push(std::make_shared<Elem>(1));
+  heap.push(std::make_shared<Elem>(-5));
+  heap.push(std::make_shared<Elem>(12));
+  heap.push(std::make_shared<Elem>(-12));
+  heap.push(std::make_shared<Elem>(-7));
+
+  // std::cout << heap << std::endl;
+
+  EXPECT_FALSE(heap.empty());
+
+  EXPECT_EQ(-12, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(-7, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(-5, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(1, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(2, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(12, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(99, heap.top().data);
+
+  EXPECT_FALSE(heap.empty());
+  heap.pop();
+  EXPECT_TRUE(heap.empty());
+}
+
+
+TEST(IndIntruHeap, K_4) {
+  crimson::IndIntruHeap<std::shared_ptr<Elem>,
+			Elem,
+			&Elem::heap_data,
+			ElemCompare,
+			4> heap;
+
+  EXPECT_TRUE(heap.empty());
+
+  heap.push(std::make_shared<Elem>(2));
+
+  EXPECT_FALSE(heap.empty());
+
+  heap.push(std::make_shared<Elem>(99));
+  heap.push(std::make_shared<Elem>(1));
+  heap.push(std::make_shared<Elem>(-5));
+  heap.push(std::make_shared<Elem>(12));
+  heap.push(std::make_shared<Elem>(-12));
+  heap.push(std::make_shared<Elem>(-7));
+
+  // std::cout << heap << std::endl;
+
+  EXPECT_FALSE(heap.empty());
+
+  EXPECT_EQ(-12, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(-7, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(-5, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(1, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(2, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(12, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(99, heap.top().data);
+
+  EXPECT_FALSE(heap.empty());
+  heap.pop();
+  EXPECT_TRUE(heap.empty());
+}
+
+
+TEST(IndIntruHeap, K_10) {
+  crimson::IndIntruHeap<std::shared_ptr<Elem>,
+			Elem,
+			&Elem::heap_data,
+			ElemCompare,
+			10> heap;
+
+  EXPECT_TRUE(heap.empty());
+
+  heap.push(std::make_shared<Elem>(2));
+
+  EXPECT_FALSE(heap.empty());
+
+  heap.push(std::make_shared<Elem>(99));
+  heap.push(std::make_shared<Elem>(1));
+  heap.push(std::make_shared<Elem>(-5));
+  heap.push(std::make_shared<Elem>(12));
+  heap.push(std::make_shared<Elem>(-12));
+  heap.push(std::make_shared<Elem>(-7));
+
+  // std::cout << heap << std::endl;
+
+  EXPECT_FALSE(heap.empty());
+
+  EXPECT_EQ(-12, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(-7, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(-5, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(1, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(2, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(12, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(99, heap.top().data);
+
+  EXPECT_FALSE(heap.empty());
+  heap.pop();
+  EXPECT_TRUE(heap.empty());
+}
+
+
+TEST(IndIntruHeap, multi_K) {
+  crimson::IndIntruHeap<std::shared_ptr<Elem>,
+			Elem,
+			&Elem::heap_data,
+			ElemCompare,
+			2> heap2;
+
+  crimson::IndIntruHeap<std::shared_ptr<Elem>,
+			Elem,
+			&Elem::heap_data,
+			ElemCompare,
+			3> heap3;
+
+  crimson::IndIntruHeap<std::shared_ptr<Elem>,
+			Elem,
+			&Elem::heap_data,
+			ElemCompare,
+			4> heap4;
+
+  crimson::IndIntruHeap<std::shared_ptr<Elem>,
+			Elem,
+			&Elem::heap_data,
+			ElemCompare,
+			10> heap10;
+
+  // 250 should give us at least 4 levels on all heaps
+  constexpr size_t count = 250;
+
+  std::srand(std::time(0)); // use current time as seed for random generator
+
+  // insert same set of random values into the four heaps
+  for (size_t i = 0; i < count; ++i) {
+    int value = std::rand() % 201 - 100; // -100...+100
+    auto data = std::make_shared<Elem>(value);
+    heap2.push(data);
+    heap3.push(data);
+    heap4.push(data);
+    heap10.push(data);
+  }
+
+  auto bound = std::numeric_limits<decltype(Elem::data)>::min();
+
+  for (size_t i = 0; i < count; ++i) {
+    auto current = heap2.top().data;
+
+    EXPECT_GE(current, bound) <<
+      "we should never go down, only increase or remain the same";
+    EXPECT_EQ(current, heap3.top().data) <<
+      "heap1's data and heap3's data should match";
+    EXPECT_EQ(current, heap4.top().data) <<
+      "heap1's data and heap4's data should match";
+    EXPECT_EQ(current, heap10.top().data) <<
+      "heap1's data and heap10's data should match";
+
+    heap2.pop();
+    heap3.pop();
+    heap4.pop();
+    heap10.pop();
+
+    bound = current;
+  }
+
+  EXPECT_TRUE(heap2.empty()) << "should be empty after all elements popped";
+  EXPECT_TRUE(heap3.empty()) << "should be empty after all elements popped";
+  EXPECT_TRUE(heap4.empty()) << "should be empty after all elements popped";
+  EXPECT_TRUE(heap10.empty()) << "should be empty after all elements popped";
+}
+
+
+TEST(IndIntruHeap, demote) {
+  crimson::IndIntruHeap<std::unique_ptr<Elem>,
+			Elem,
+			&Elem::heap_data,
+			ElemCompare> heap;
+
+  heap.push(std::unique_ptr<Elem>(new Elem(2)));
+  heap.push(std::unique_ptr<Elem>(new Elem(99)));
+  heap.push(std::unique_ptr<Elem>(new Elem(1)));
+  heap.push(std::unique_ptr<Elem>(new Elem(-5)));
+  heap.push(std::unique_ptr<Elem>(new Elem(12)));
+  heap.push(std::unique_ptr<Elem>(new Elem(-12)));
+  heap.push(std::unique_ptr<Elem>(new Elem(-7)));
+
+  heap.top().data = 24;
+
+  heap.demote(heap.top());
+
+  EXPECT_EQ(-7, heap.top().data);
+
+  heap.pop();
+  heap.pop();
+  heap.pop();
+  heap.pop();
+  heap.pop();
+
+  EXPECT_EQ(24, heap.top().data);
+}
+
+
+TEST(IndIntruHeap, demote_not) {
+  crimson::IndIntruHeap<std::unique_ptr<Elem>,
+			Elem,
+			&Elem::heap_data,
+			ElemCompare> heap;
+
+  heap.push(std::unique_ptr<Elem>(new Elem(2)));
+  heap.push(std::unique_ptr<Elem>(new Elem(99)));
+  heap.push(std::unique_ptr<Elem>(new Elem(1)));
+  heap.push(std::unique_ptr<Elem>(new Elem(-5)));
+  heap.push(std::unique_ptr<Elem>(new Elem(12)));
+  heap.push(std::unique_ptr<Elem>(new Elem(-12)));
+  heap.push(std::unique_ptr<Elem>(new Elem(-7)));
+
+  heap.top().data = -99;
+
+  heap.demote(heap.top());
+
+  EXPECT_EQ(-99, heap.top().data);
+
+  heap.pop();
+
+  EXPECT_EQ(-7, heap.top().data);
+}
+
+
+TEST(IndIntruHeap, promote_and_demote) {
+  crimson::IndIntruHeap<std::shared_ptr<Elem>,
+			Elem,
+			&Elem::heap_data,
+			ElemCompare> heap;
+
+  auto data1 = std::make_shared<Elem>(1);
+
+  heap.push(std::make_shared<Elem>(2));
+  heap.push(std::make_shared<Elem>(99));
+  heap.push(data1);
+  heap.push(std::make_shared<Elem>(-5));
+  heap.push(std::make_shared<Elem>(12));
+  heap.push(std::make_shared<Elem>(-12));
+  heap.push(std::make_shared<Elem>(-7));
+
+  EXPECT_EQ(-12, heap.top().data);
+
+  data1->data = -99;
+  heap.promote(*data1);
+
+  EXPECT_EQ(-99, heap.top().data);
+
+  data1->data = 999;
+  heap.demote(*data1);
+
+  EXPECT_EQ(-12, heap.top().data);
+
+  data1->data = 9;
+  heap.promote(*data1);
+
+  heap.pop(); // remove -12
+  heap.pop(); // remove -7
+  heap.pop(); // remove -5
+  heap.pop(); // remove 2
+
+  EXPECT_EQ(9, heap.top().data);
+}
+
+
+TEST(IndIntruHeap, adjust) {
+  crimson::IndIntruHeap<std::shared_ptr<Elem>,
+			Elem,
+			&Elem::heap_data,
+			ElemCompare> heap;
+
+  auto data1 = std::make_shared<Elem>(1);
+
+  heap.push(std::make_shared<Elem>(2));
+  heap.push(std::make_shared<Elem>(99));
+  heap.push(data1);
+  heap.push(std::make_shared<Elem>(-5));
+  heap.push(std::make_shared<Elem>(12));
+  heap.push(std::make_shared<Elem>(-12));
+  heap.push(std::make_shared<Elem>(-7));
+
+  // heap.display_sorted(std::cout);
+
+  EXPECT_EQ(-12, heap.top().data);
+
+  data1->data = 999;
+  heap.adjust(*data1);
+
+  EXPECT_EQ(-12, heap.top().data);
+
+  data1->data = -99;
+  heap.adjust(*data1);
+
+  EXPECT_EQ(-99, heap.top().data);
+
+  data1->data = 9;
+  heap.adjust(*data1);
+
+  EXPECT_EQ(-12, heap.top().data);
+
+  heap.pop(); // remove -12
+  heap.pop(); // remove -7
+  heap.pop(); // remove -5
+  heap.pop(); // remove 2
+
+  EXPECT_EQ(9, heap.top().data);
+}
+
+
+TEST(IndIntruHeap, remove_careful) {
+  // here we test whether a common mistake in implementing remove is
+  // done; if after we remove an item and move the last element of the
+  // heap to the position of the removed element, we need to sift it
+  // rather than sift_down it.
+
+  crimson::IndIntruHeap<std::shared_ptr<Elem>,
+			Elem,
+			&Elem::heap_data,
+			ElemCompare,
+			2> heap;
+
+  heap.push(std::make_shared<Elem>(0));
+  heap.push(std::make_shared<Elem>(10));
+  heap.push(std::make_shared<Elem>(100));
+  heap.push(std::make_shared<Elem>(20));
+  heap.push(std::make_shared<Elem>(30));
+  heap.push(std::make_shared<Elem>(200));
+  heap.push(std::make_shared<Elem>(300));
+  heap.push(std::make_shared<Elem>(40));
+
+  auto k = heap.find(Elem(200));
+  EXPECT_NE(heap.end(), k) <<
+    "we should have found an element with the value 200, which we'll remove";
+  heap.remove(k);
+
+  auto i = heap.cbegin();
+  EXPECT_EQ(0, i->data);
+  ++i;
+  EXPECT_EQ(10, i->data);
+  ++i;
+  EXPECT_EQ(40, i->data) <<
+    "this needs to be 40 or there's a mistake in implementation";
+  ++i;
+  EXPECT_EQ(20, i->data);
+  ++i;
+  EXPECT_EQ(30, i->data);
+  ++i;
+  EXPECT_EQ(100, i->data) <<
+    "this needs to be 100 or there's a mistake in implementation";
+}
+
+
+TEST_F(HeapFixture1, shared_data) {
+
+  crimson::IndIntruHeap<std::shared_ptr<Elem>,Elem,&Elem::heap_data_alt,ElemCompareAlt> heap2;
+
+  heap2.push(data1);
+  heap2.push(data2);
+  heap2.push(data3);
+  heap2.push(data4);
+  heap2.push(data5);
+  heap2.push(data6);
+  heap2.push(data7);
+
+  data3->data = 32;
+  heap.adjust(*data3);
+  heap2.adjust(*data3);
+
+  EXPECT_EQ(-12, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(-7, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(-5, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(2, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(12, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(32, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(99, heap.top().data);
+
+  EXPECT_EQ(32, heap2.top().data);
+  heap2.pop();
+  EXPECT_EQ(12, heap2.top().data);
+  heap2.pop();
+  EXPECT_EQ(2, heap2.top().data);
+  heap2.pop();
+  EXPECT_EQ(-12, heap2.top().data);
+  heap2.pop();
+  EXPECT_EQ(99, heap2.top().data);
+  heap2.pop();
+  EXPECT_EQ(-5, heap2.top().data);
+  heap2.pop();
+  EXPECT_EQ(-7, heap2.top().data);
+}
+
+
+TEST_F(HeapFixture1, iterator_basics) {
+  {
+    uint count = 0;
+    for(auto i = heap.begin(); i != heap.end(); ++i) {
+      ++count;
+    }
+
+    EXPECT_EQ(7u, count) << "count should be 7";
+  }
+
+  auto i1 = heap.begin();
+
+  EXPECT_EQ(-12, i1->data) <<
+    "first member with * operator must be smallest";
+
+  EXPECT_EQ(-12, (*i1).data) <<
+    "first member with -> operator must be smallest";
+
+  Elem& e1 = *i1;
+  EXPECT_EQ(-12, e1.data) <<
+    "first member with -> operator must be smallest";
+
+  {
+    std::set<int> values;
+    values.insert(2);
+    values.insert(99);
+    values.insert(1);
+    values.insert(-5);
+    values.insert(12);
+    values.insert(-12);
+    values.insert(-7);
+
+    for(auto i = heap.begin(); i != heap.end(); ++i) {
+      auto v = *i;
+      EXPECT_NE(values.end(), values.find(v.data)) <<
+	"value in heap must be part of original set";
+      values.erase(v.data);
+    }
+    EXPECT_EQ(0u, values.size()) << "all values must have been seen";
+  }
+}
+
+
+TEST_F(HeapFixture1, const_iterator_basics) {
+  const auto& cheap = heap;
+
+  {
+    uint count = 0;
+    for(auto i = cheap.cbegin(); i != cheap.cend(); ++i) {
+      ++count;
+    }
+
+    EXPECT_EQ(7u, count) << "count should be 7";
+  }
+
+  auto i1 = heap.cbegin();
+
+  EXPECT_EQ(-12, i1->data) <<
+    "first member with * operator must be smallest";
+
+  EXPECT_EQ(-12, (*i1).data) <<
+    "first member with -> operator must be smallest";
+
+  const Elem& e1 = *i1;
+  EXPECT_EQ(-12, e1.data) <<
+    "first member with -> operator must be smallest";
+
+  {
+    std::set<int> values;
+    values.insert(2);
+    values.insert(99);
+    values.insert(1);
+    values.insert(-5);
+    values.insert(12);
+    values.insert(-12);
+    values.insert(-7);
+
+    for(auto i = heap.cbegin(); i != heap.cend(); ++i) {
+      auto v = *i;
+      EXPECT_NE(values.end(), values.find(v.data)) <<
+	"value in heap must be part of original set";
+      values.erase(v.data);
+    }
+    EXPECT_EQ(0u, values.size()) << "all values must have been seen";
+  }
+}
+
+
+TEST_F(HeapFixture1, iterator_find_rfind) {
+  {
+    auto it1 = heap.find(data7);
+    EXPECT_NE(heap.end(), it1) <<
+      "find by indirection for included element should succeed";
+    EXPECT_EQ(-7, it1->data) <<
+      "find by indirection for included element should result in right value";
+
+    auto fake_data = std::make_shared<Elem>(-7);
+    auto it2 = heap.find(fake_data);
+    EXPECT_EQ(heap.end(), it2) <<
+      "find by indirection for not included element should fail";
+  }
+
+  {
+    auto it1 = heap.find(Elem(-7));
+    EXPECT_NE(heap.end(), it1) <<
+      "find by value for included element should succeed";
+    EXPECT_EQ(-7, it1->data) <<
+      "find by value for included element should result in right value";
+
+    auto it2 = heap.find(Elem(7));
+    EXPECT_EQ(heap.end(), it2) <<
+      "find by value for not included element should fail";
+  }
+
+  {
+    auto it1 = heap.rfind(data7);
+    EXPECT_NE(heap.end(), it1) <<
+      "reverse find by indirecton for included element should succeed";
+    EXPECT_EQ(-7, it1->data) <<
+      "reverse find by indirection for included element should result "
+      "in right value";
+
+    auto fake_data = std::make_shared<Elem>(-7);
+    auto it2 = heap.rfind(fake_data);
+    EXPECT_EQ(heap.end(), it2) <<
+      "reverse find by indirection for not included element should fail";
+  }
+
+  {
+    auto it1 = heap.rfind(Elem(-7));
+    EXPECT_NE(heap.end(), it1) <<
+      "reverse find by value for included element should succeed";
+    EXPECT_EQ(-7, it1->data) <<
+      "reverse find by value for included element should result "
+      "in right value";
+
+    auto it2 = heap.rfind(Elem(7));
+    EXPECT_EQ(heap.end(), it2) <<
+      "reverse find by value for not included element should fail";
+  }
+}
+
+
+TEST_F(HeapFixture1, const_iterator_find_rfind) {
+  const auto& c_heap = heap;
+
+  {
+    auto it1 = c_heap.find(data7);
+    EXPECT_NE(c_heap.cend(), it1) <<
+      "find by indirection for included element should succeed";
+    EXPECT_EQ(-7, it1->data) <<
+      "find by indirection for included element should result in right value";
+
+    auto fake_data = std::make_shared<Elem>(-7);
+    auto it2 = c_heap.find(fake_data);
+    EXPECT_EQ(c_heap.cend(), it2) <<
+      "find by indirection for not included element should fail";
+  }
+
+  {
+    auto it1 = c_heap.find(Elem(-7));
+    EXPECT_NE(c_heap.cend(), it1) <<
+      "find by value for included element should succeed";
+    EXPECT_EQ(-7, it1->data) <<
+      "find by value for included element should result in right value";
+
+    auto it2 = c_heap.find(Elem(7));
+    EXPECT_EQ(c_heap.cend(), it2) <<
+      "find by value for not included element should fail";
+  }
+
+  {
+    auto it1 = c_heap.rfind(data7);
+    EXPECT_NE(c_heap.cend(), it1) <<
+      "reverse find by indirecton for included element should succeed";
+    EXPECT_EQ(-7, it1->data) <<
+      "reverse find by indirection for included element should result "
+      "in right value";
+
+    auto fake_data = std::make_shared<Elem>(-7);
+    auto it2 = c_heap.rfind(fake_data);
+    EXPECT_EQ(c_heap.cend(), it2) <<
+      "reverse find by indirection for not included element should fail";
+  }
+
+  {
+    auto it1 = c_heap.rfind(Elem(-7));
+    EXPECT_NE(c_heap.cend(), it1) <<
+      "reverse find by value for included element should succeed";
+    EXPECT_EQ(-7, it1->data) <<
+      "reverse find by value for included element should result "
+      "in right value";
+
+    auto it2 = c_heap.rfind(Elem(7));
+    EXPECT_EQ(c_heap.cend(), it2) <<
+      "reverse find by value for not included element should fail";
+  }
+}
+
+
+TEST_F(HeapFixture1, iterator_remove) {
+  auto it1 = heap.find(data7);
+  EXPECT_NE(heap.end(), it1) << "find for included element should succeed";
+
+  heap.remove(it1);
+
+  auto it2 = heap.find(data7);
+  EXPECT_EQ(heap.end(), it2) << "find for removed element should fail";
+
+  for (auto it3 = heap.begin(); it3 != heap.end(); ++it3) {
+    EXPECT_NE(-7, it3->data) <<
+      "iterating through heap should not find removed value";
+  }
+
+  // move through heap without -7
+  EXPECT_EQ(-12, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(-5, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(1, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(2, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(12, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(99, heap.top().data);
+  heap.pop();
+}
+
+
+TEST_F(HeapFixture1, four_tops) {
+  Elem& top1 = heap.top();
+  EXPECT_EQ(-12, top1.data);
+
+  const Elem& top2 = heap.top();
+  EXPECT_EQ(-12, top2.data);
+
+  std::shared_ptr<Elem> top3 = heap.top_ind();
+  EXPECT_EQ(-12, top3->data);
+
+  const std::shared_ptr<Elem> top4 = heap.top_ind();
+  EXPECT_EQ(-12, top4->data);
+
+  const auto& c_heap = heap;
+
+  const Elem& top5 = c_heap.top();
+  EXPECT_EQ(-12, top5.data);
+
+  const std::shared_ptr<Elem> top6 = c_heap.top_ind();
+  EXPECT_EQ(-12, top6->data);
+}
+
+
+TEST_F(HeapFixture1, display_sorted) {
+  std::stringstream ss;
+
+  heap.display_sorted(ss);
+
+  std::string s = ss.str();
+
+  EXPECT_GT(s.length(), 0u);
+
+  auto negseven = s.find("-7");
+  EXPECT_NE(negseven, std::string::npos);
+
+  auto ninetynine = s.find("99");
+  EXPECT_NE(ninetynine, std::string::npos);
+
+  // index of -7 should be less than index of 99
+  EXPECT_LT(negseven, ninetynine);
+
+#if 0
+  std::cout << s << std::endl;
+#endif
+}
diff --git a/support/test/test_intrusive_heap.cc b/support/test/test_intrusive_heap.cc
new file mode 100644
index 00000000000..a0ad07524e0
--- /dev/null
+++ b/support/test/test_intrusive_heap.cc
@@ -0,0 +1,86 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ */
+
+
+#include <string>
+#include <iostream>
+
+#include "intrusive_heap.h"
+
+
+struct TestCompare;
+struct TestIntruData;
+
+
+class Test1 {
+    friend TestCompare;
+    friend TestIntruData;
+
+    int data;
+    crimson::IntruHeapData heap_data;
+
+public:
+    Test1(int _data) : data(_data) {}
+
+    friend std::ostream& operator<<(std::ostream& out, const Test1& d) {
+        out << d.data << " (" << d.heap_data << ")";
+        return out;
+    }
+
+    int& the_data() { return data; }
+};
+
+
+struct TestCompare {
+    bool operator()(const Test1& d1, const Test1& d2) {
+        return d1.data < d2.data;
+    }
+};
+
+
+struct TestIntruData {
+    crimson::IntruHeapData& operator()(Test1& d) {
+        return d.heap_data;
+    }
+};
+
+
+int main(int argc, char** argv) {
+    Test1 d1(2);
+    Test1 d2(3);
+    Test1 d3(1);
+    Test1 d4(-5);
+
+    crimson::IntruHeap<Test1, TestIntruData, TestCompare> my_heap;
+
+    my_heap.push(d1);
+    my_heap.push(d2);
+    my_heap.push(d3);
+    my_heap.push(d4);
+    my_heap.push(Test1(-9));
+    my_heap.push(Test1(99));
+    my_heap.push(Test1(0));
+
+    std::cout << my_heap << std::endl;
+
+    auto& t = my_heap.top();
+    t.the_data() = 17;
+    my_heap.adjust_down(t);
+
+    std::cout << my_heap << std::endl;
+
+    my_heap.display_sorted(std::cout);
+
+    while (!my_heap.empty()) {
+        auto& top = my_heap.top();
+        std::cout << top << std::endl;
+        my_heap.pop();
+        std::cout << my_heap << std::endl;
+    }
+
+    return 0;
+}
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
new file mode 100644
index 00000000000..e72810b56aa
--- /dev/null
+++ b/test/CMakeLists.txt
@@ -0,0 +1,35 @@
+include_directories(../src)
+include_directories(../support/src)
+include_directories(../sim/src)
+include_directories(${BOOST_INCLUDE_DIR})
+
+set(support_srcs ../sim/src/test_dmclock.cc)
+set(test_srcs
+  test_test_client.cc
+  test_dmclock_server.cc
+  test_dmclock_client.cc
+  )
+
+set_source_files_properties(${core_srcs} ${test_srcs}
+  PROPERTIES
+  COMPILE_FLAGS "${local_flags}"
+  )
+
+add_executable(dmclock-tests ${test_srcs} ${support_srcs})
+
+if (TARGET gtest AND TARGET gtest_main)
+  add_dependencies(dmclock-tests gtest gtest_main)
+  target_link_libraries(dmclock-tests
+    LINK_PRIVATE $<TARGET_FILE:dmclock>
+    pthread
+    $<TARGET_FILE:gtest>
+    $<TARGET_FILE:gtest_main>)
+else()
+  target_link_libraries(dmclock-tests
+    LINK_PRIVATE $<TARGET_FILE:dmclock> pthread ${GTEST_LIBRARY} ${GTEST_MAIN_LIBRARY})
+endif()
+  
+add_dependencies(dmclock-tests dmclock)
+
+add_test(NAME dmclock-tests
+  COMMAND $<TARGET_FILE:dmclock-tests>)
diff --git a/test/test_dmclock_client.cc b/test/test_dmclock_client.cc
new file mode 100644
index 00000000000..ee4172dc348
--- /dev/null
+++ b/test/test_dmclock_client.cc
@@ -0,0 +1,219 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ */
+
+
+#include <chrono>
+#include <mutex>
+#include <functional>
+#include <iostream>
+
+
+#include "dmclock_client.h"
+#include "dmclock_util.h"
+#include "gtest/gtest.h"
+
+
+namespace dmc = crimson::dmclock;
+
+
+namespace crimson {
+  namespace dmclock {
+
+    /*
+     * Allows us to test the code provided with the mutex provided locked.
+     */
+    static void test_locked(std::mutex& mtx, std::function<void()> code) {
+      std::lock_guard<std::mutex> l(mtx);
+      code();
+    }
+
+
+    TEST(dmclock_client, server_erase) {
+      using ServerId = int;
+      // using ClientId = int;
+
+      ServerId server = 101;
+      // ClientId client = 3;
+
+      // dmc::PhaseType resp_params = dmc::PhaseType::reservation;
+
+      dmc::ServiceTracker<ServerId> st(std::chrono::seconds(2),
+                                       std::chrono::seconds(3));
+
+      auto lock_st = [&](std::function<void()> code) {
+	test_locked(st.data_mtx, code);
+      };
+
+      /* The timeline should be as follows:
+       *
+       *     0 seconds : request created
+       *
+       *     1 seconds : map is size 1
+       *
+       * 2 seconds : clean notes first mark; +2 is base for further calcs
+       *
+       * 4 seconds : clean does nothing except makes another mark
+       *
+       *   5 seconds : when we're secheduled to erase (+2 + 3)
+       *
+       *     5 seconds : since the clean job hasn't run yet, map still size 1
+       *
+       * 6 seconds : clean erases server
+       *
+       *     7 seconds : verified server is gone (map size 0)
+       */
+
+      lock_st([&] () {
+	  EXPECT_EQ(0u, st.server_map.size()) <<
+	    "server map initially has size 0";
+	});
+
+      std::this_thread::sleep_for(std::chrono::seconds(1));
+
+      // call for side effects
+      (void) st.get_req_params(server);
+
+      lock_st([&] () {
+	  EXPECT_EQ(1u, st.server_map.size()) <<
+	    "server map has size 1 after first request";
+	});
+
+      std::this_thread::sleep_for(std::chrono::seconds(4));
+
+      lock_st([&] () {
+	  EXPECT_EQ(1u, st.server_map.size()) <<
+	    "server map has size 1 just before erase";
+	});
+
+      std::this_thread::sleep_for(std::chrono::seconds(2));
+
+      lock_st([&] () {
+	  EXPECT_EQ(0u, st.server_map.size()) <<
+	    "server map has size 0 just after erase";
+	});
+    } // TEST
+
+
+    TEST(dmclock_client, delta_rho_values) {
+      using ServerId = int;
+      // using ClientId = int;
+
+      ServerId server1 = 101;
+      ServerId server2 = 7;
+      // ClientId client = 3;
+
+      // RespParams<ServerId> resp_params(server, dmc::PhaseType::reservation);
+
+      dmc::ServiceTracker<ServerId> st(std::chrono::seconds(2),
+                                       std::chrono::seconds(3));
+
+      auto rp1 = st.get_req_params(server1);
+
+      EXPECT_EQ(1u, rp1.delta) <<
+	"delta should be 1 with no intervening responses by" <<
+	"other servers";
+      EXPECT_EQ(1u, rp1.rho) <<
+	"rho should be 1 with no intervening reservation responses by" <<
+	"other servers";
+
+      auto rp2 = st.get_req_params(server1);
+
+      EXPECT_EQ(1u, rp2.delta) <<
+	"delta should be 1 with no intervening responses by" <<
+	"other servers";
+      EXPECT_EQ(1u, rp2.rho) <<
+	"rho should be 1 with no intervening reservation responses by" <<
+	"other servers";
+
+      st.track_resp(server1, dmc::PhaseType::priority);
+
+      auto rp3 = st.get_req_params(server1);
+
+      EXPECT_EQ(1u, rp3.delta) <<
+	"delta should be 1 with no intervening responses by" <<
+	"other servers";
+      EXPECT_EQ(1u, rp3.rho) <<
+	"rho should be 1 with no intervening reservation responses by" <<
+	"other servers";
+
+      st.track_resp(server2, dmc::PhaseType::priority);
+
+      auto rp4 = st.get_req_params(server1);
+
+      EXPECT_EQ(2u, rp4.delta) <<
+	"delta should be 2 with one intervening priority response by " <<
+	"another server";
+      EXPECT_EQ(1u, rp4.rho) <<
+	"rho should be 1 with one intervening priority responses by " <<
+	"another server";
+
+      auto rp5 = st.get_req_params(server1);
+
+      EXPECT_EQ(1u, rp5.delta) <<
+	"delta should be 1 with no intervening responses by" <<
+	"other servers";
+      EXPECT_EQ(1u, rp5.rho) <<
+	"rho should be 1 with no intervening reservation responses by" <<
+	"other servers";
+
+      st.track_resp(server2, dmc::PhaseType::reservation);
+
+      auto rp6 = st.get_req_params(server1);
+
+      EXPECT_EQ(2u, rp6.delta) <<
+	"delta should be 2 with one intervening reservation response by " <<
+	"another server";
+      EXPECT_EQ(2u, rp6.rho) <<
+	"rho should be 2 with one intervening reservation responses by " <<
+	"another server";
+
+      // auto rp6_b = st.get_req_params(server2);
+
+      st.track_resp(server2, dmc::PhaseType::reservation);
+      st.track_resp(server1, dmc::PhaseType::priority);
+      st.track_resp(server2, dmc::PhaseType::priority);
+      st.track_resp(server2, dmc::PhaseType::reservation);
+      st.track_resp(server1, dmc::PhaseType::reservation);
+      st.track_resp(server1, dmc::PhaseType::priority);
+      st.track_resp(server2, dmc::PhaseType::priority);
+
+      auto rp7 = st.get_req_params(server1);
+
+      EXPECT_EQ(5u, rp7.delta) <<
+	"delta should be 5 with fourintervening responses by " <<
+	"another server";
+      EXPECT_EQ(3u, rp7.rho) <<
+	"rho should be 3 with two intervening reservation responses by " <<
+	"another server";
+
+      auto rp7b = st.get_req_params(server2);
+
+      EXPECT_EQ(4u, rp7b.delta) <<
+	"delta should be 4 with three intervening responses by " <<
+	"another server";
+      EXPECT_EQ(2u, rp7b.rho) <<
+	"rho should be 2 with one intervening reservation responses by " <<
+	"another server";
+
+      auto rp8 = st.get_req_params(server1);
+
+      EXPECT_EQ(1u, rp8.delta) <<
+	"delta should be 1 with no intervening responses by " <<
+	"another server";
+      EXPECT_EQ(1u, rp8.rho) <<
+	"rho should be 1 with no intervening reservation responses by " <<
+	"another server";
+
+      auto rp8b = st.get_req_params(server2);
+      EXPECT_EQ(1u, rp8b.delta) <<
+	"delta should be 1 with no intervening responses by " <<
+	"another server";
+      EXPECT_EQ(1u, rp8b.rho) <<
+	"rho should be 1 with no intervening reservation responses by " <<
+	"another server";
+    } // TEST
+  } // namespace dmclock
+} // namespace crimson
diff --git a/test/test_dmclock_server.cc b/test/test_dmclock_server.cc
new file mode 100644
index 00000000000..4555e377323
--- /dev/null
+++ b/test/test_dmclock_server.cc
@@ -0,0 +1,826 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ */
+
+
+#include <memory>
+#include <chrono>
+#include <iostream>
+#include <list>
+#include <vector>
+
+
+#include "dmclock_server.h"
+#include "dmclock_util.h"
+#include "gtest/gtest.h"
+
+
+namespace dmc = crimson::dmclock;
+
+
+// we need a request object; an empty one will do
+struct Request {
+};
+
+
+namespace crimson {
+  namespace dmclock {
+
+    /*
+     * Allows us to test the code provided with the mutex provided locked.
+     */
+    static void test_locked(std::mutex& mtx, std::function<void()> code) {
+      std::unique_lock<std::mutex> l(mtx);
+      code();
+    }
+
+
+    TEST(dmclock_server, bad_tag_deathtest) {
+      using ClientId = int;
+      using Queue = dmc::PullPriorityQueue<ClientId,Request>;
+      using QueueRef = std::unique_ptr<Queue>;
+
+      ClientId client1 = 17;
+      ClientId client2 = 18;
+
+      double reservation = 0.0;
+      double weight = 0.0;
+
+      dmc::ClientInfo ci1(reservation, weight, 0.0);
+      dmc::ClientInfo ci2(reservation, weight, 1.0);
+
+      auto client_info_f = [&] (ClientId c) -> dmc::ClientInfo {
+	if (client1 == c) return ci1;
+	else if (client2 == c) return ci2;
+	else {
+	  ADD_FAILURE() << "got request from neither of two clients";
+	  return ci1; // must return
+	}
+      };
+
+      QueueRef pq(new Queue(client_info_f, false));
+      Request req;
+      ReqParams req_params(1,1);
+
+      EXPECT_DEATH_IF_SUPPORTED(pq->add_request(req, client1, req_params),
+				"Assertion.*reservation.*max_tag.*"
+				"proportion.*max_tag") <<
+	"we should fail if a client tries to generate a reservation tag "
+	"where reservation and proportion are both 0";
+
+
+      EXPECT_DEATH_IF_SUPPORTED(pq->add_request(req, client2, req_params),
+				"Assertion.*reservation.*max_tag.*"
+				"proportion.*max_tag") <<
+	"we should fail if a client tries to generate a reservation tag "
+	"where reservation and proportion are both 0";
+    }
+
+
+    TEST(dmclock_server, client_idle_erase) {
+      using ClientId = int;
+      using Queue = dmc::PushPriorityQueue<ClientId,Request>;
+      int client = 17;
+      double reservation = 100.0;
+
+      dmc::ClientInfo ci(reservation, 1.0, 0.0);
+      auto client_info_f = [&] (ClientId c) -> dmc::ClientInfo { return ci; };
+      auto server_ready_f = [] () -> bool { return true; };
+      auto submit_req_f = [] (const ClientId& c,
+			      std::unique_ptr<Request> req,
+			      dmc::PhaseType phase) {
+	// empty; do nothing
+      };
+
+      Queue pq(client_info_f,
+	       server_ready_f,
+	       submit_req_f,
+	       std::chrono::seconds(3),
+	       std::chrono::seconds(5),
+	       std::chrono::seconds(2),
+	       false);
+
+      auto lock_pq = [&](std::function<void()> code) {
+	test_locked(pq.data_mtx, code);
+      };
+
+
+      /* The timeline should be as follows:
+       *
+       *     0 seconds : request created
+       *
+       *     1 seconds : map is size 1, idle is false
+       *
+       * 2 seconds : clean notes first mark; +2 is base for further calcs
+       *
+       * 4 seconds : clean does nothing except makes another mark
+       *
+       *   5 seconds : when we're secheduled to idle (+2 + 3)
+       *
+       * 6 seconds : clean idles client
+       *
+       *   7 seconds : when we're secheduled to erase (+2 + 5)
+       *
+       *     7 seconds : verified client is idle
+       *
+       * 8 seconds : clean erases client info
+       *
+       *     9 seconds : verified client is erased
+       */
+
+      lock_pq([&] () {
+	  EXPECT_EQ(0u, pq.client_map.size()) <<
+	    "client map initially has size 0";
+	});
+
+      Request req;
+      dmc::ReqParams req_params(1, 1);
+      pq.add_request_time(req, client, req_params, dmc::get_time());
+
+      std::this_thread::sleep_for(std::chrono::seconds(1));
+
+      lock_pq([&] () {
+	  EXPECT_EQ(1u, pq.client_map.size()) <<
+	    "client map has 1 after 1 client";
+	  EXPECT_FALSE(pq.client_map.at(client)->idle) <<
+	    "initially client map entry shows not idle.";
+	});
+
+      std::this_thread::sleep_for(std::chrono::seconds(6));
+
+      lock_pq([&] () {
+	  EXPECT_TRUE(pq.client_map.at(client)->idle) <<
+	    "after idle age client map entry shows idle.";
+	});
+
+      std::this_thread::sleep_for(std::chrono::seconds(2));
+
+      lock_pq([&] () {
+	  EXPECT_EQ(0u, pq.client_map.size()) <<
+	    "client map loses its entry after erase age";
+	});
+    } // TEST
+
+
+#if 0
+    TEST(dmclock_server, reservation_timing) {
+      using ClientId = int;
+      // NB? PUSH OR PULL
+      using Queue = std::unique_ptr<dmc::PriorityQueue<ClientId,Request>>;
+      using std::chrono::steady_clock;
+
+      int client = 17;
+
+      std::vector<dmc::Time> times;
+      std::mutex times_mtx;
+      using Guard = std::lock_guard<decltype(times_mtx)>;
+
+      // reservation every second
+      dmc::ClientInfo ci(1.0, 0.0, 0.0);
+      Queue pq;
+
+      auto client_info_f = [&] (ClientId c) -> dmc::ClientInfo { return ci; };
+      auto server_ready_f = [] () -> bool { return true; };
+      auto submit_req_f = [&] (const ClientId& c,
+			       std::unique_ptr<Request> req,
+			       dmc::PhaseType phase) {
+	{
+	  Guard g(times_mtx);
+	  times.emplace_back(dmc::get_time());
+	}
+	std::thread complete([&](){ pq->request_completed(); });
+	complete.detach();
+      };
+
+      // NB? PUSH OR PULL
+      pq = Queue(new dmc::PriorityQueue<ClientId,Request>(client_info_f,
+							  server_ready_f,
+							  submit_req_f,
+							  false));
+
+      Request req;
+      ReqParams<ClientId> req_params(client, 1, 1);
+
+      for (int i = 0; i < 5; ++i) {
+	pq->add_request_time(req, req_params, dmc::get_time());
+      }
+
+      {
+	Guard g(times_mtx);
+	std::this_thread::sleep_for(std::chrono::milliseconds(5500));
+	EXPECT_EQ(5, times.size()) <<
+	  "after 5.5 seconds, we should have 5 requests times at 1 second apart";
+      }
+    } // TEST
+#endif
+
+
+    TEST(dmclock_server, remove_by_req_filter) {
+      struct MyReq {
+	int id;
+
+	MyReq(int _id) :
+	  id(_id)
+	{
+	  // empty
+	}
+      }; // MyReq
+
+      using ClientId = int;
+      using Queue = dmc::PullPriorityQueue<ClientId,MyReq>;
+
+      ClientId client1 = 17;
+      ClientId client2 = 98;
+
+      dmc::ClientInfo info1(0.0, 1.0, 0.0);
+
+      auto client_info_f = [&] (ClientId c) -> dmc::ClientInfo {
+	return info1;
+      };
+
+      Queue pq(client_info_f, true);
+
+      EXPECT_EQ(0u, pq.client_count());
+      EXPECT_EQ(0u, pq.request_count());
+
+      ReqParams req_params(1,1);
+
+      pq.add_request(MyReq(1), client1, req_params);
+      pq.add_request(MyReq(11), client1, req_params);
+      pq.add_request(MyReq(2), client2, req_params);
+      pq.add_request(MyReq(0), client2, req_params);
+      pq.add_request(MyReq(13), client2, req_params);
+      pq.add_request(MyReq(2), client2, req_params);
+      pq.add_request(MyReq(13), client2, req_params);
+      pq.add_request(MyReq(98), client2, req_params);
+      pq.add_request(MyReq(44), client1, req_params);
+
+      EXPECT_EQ(2u, pq.client_count());
+      EXPECT_EQ(9u, pq.request_count());
+
+      pq.remove_by_req_filter([](const MyReq& r) -> bool {return 1 == r.id % 2;});
+
+      EXPECT_EQ(5u, pq.request_count());
+
+      std::list<MyReq> capture;
+      pq.remove_by_req_filter(
+	[&capture] (const MyReq& r) -> bool {
+	  if (0 == r.id % 2) {
+	    capture.push_front(r);
+	    return true;
+	  } else {
+	    return false;
+	  }
+	},
+	true);
+
+      EXPECT_EQ(0u, pq.request_count());
+      EXPECT_EQ(5u, capture.size());
+      int total = 0;
+      for (auto i : capture) {
+	total += i.id;
+      }
+      EXPECT_EQ(146, total) << " sum of captured items should be 146";
+    } // TEST
+
+
+    TEST(dmclock_server, remove_by_req_filter_ordering_forwards_visit) {
+      struct MyReq {
+	int id;
+
+	MyReq(int _id) :
+	  id(_id)
+	{
+	  // empty
+	}
+      }; // MyReq
+
+      using ClientId = int;
+      using Queue = dmc::PullPriorityQueue<ClientId,MyReq>;
+
+      ClientId client1 = 17;
+
+      dmc::ClientInfo info1(0.0, 1.0, 0.0);
+
+      auto client_info_f = [&] (ClientId c) -> dmc::ClientInfo {
+	return info1;
+      };
+
+      Queue pq(client_info_f, true);
+
+      EXPECT_EQ(0u, pq.client_count());
+      EXPECT_EQ(0u, pq.request_count());
+
+      ReqParams req_params(1,1);
+
+      pq.add_request(MyReq(1), client1, req_params);
+      pq.add_request(MyReq(2), client1, req_params);
+      pq.add_request(MyReq(3), client1, req_params);
+      pq.add_request(MyReq(4), client1, req_params);
+      pq.add_request(MyReq(5), client1, req_params);
+      pq.add_request(MyReq(6), client1, req_params);
+
+      EXPECT_EQ(1u, pq.client_count());
+      EXPECT_EQ(6u, pq.request_count());
+
+      // remove odd ids in forward order and append to end
+
+      std::vector<MyReq> capture;
+      pq.remove_by_req_filter(
+	[&capture] (const MyReq& r) -> bool {
+	  if (1 == r.id % 2) {
+	    capture.push_back(r);
+	    return true;
+	  } else {
+	    return false;
+	  }
+	},
+	false);
+
+      EXPECT_EQ(3u, pq.request_count());
+      EXPECT_EQ(3u, capture.size());
+      EXPECT_EQ(1, capture[0].id) << "items should come out in forward order";
+      EXPECT_EQ(3, capture[1].id) << "items should come out in forward order";
+      EXPECT_EQ(5, capture[2].id) << "items should come out in forward order";
+
+      // remove even ids in reverse order but insert at front so comes
+      // out forwards
+
+      std::vector<MyReq> capture2;
+      pq.remove_by_req_filter(
+	[&capture2] (const MyReq& r) -> bool {
+	  if (0 == r.id % 2) {
+	    capture2.insert(capture2.begin(), r);
+	    return true;
+	  } else {
+	    return false;
+	  }
+	},
+	false);
+
+      EXPECT_EQ(0u, pq.request_count());
+      EXPECT_EQ(3u, capture2.size());
+      EXPECT_EQ(6, capture2[0].id) << "items should come out in reverse order";
+      EXPECT_EQ(4, capture2[1].id) << "items should come out in reverse order";
+      EXPECT_EQ(2, capture2[2].id) << "items should come out in reverse order";
+    } // TEST
+
+
+    TEST(dmclock_server, remove_by_req_filter_ordering_backwards_visit) {
+      struct MyReq {
+	int id;
+
+	MyReq(int _id) :
+	  id(_id)
+	{
+	  // empty
+	}
+      }; // MyReq
+
+      using ClientId = int;
+      using Queue = dmc::PullPriorityQueue<ClientId,MyReq>;
+
+      ClientId client1 = 17;
+
+      dmc::ClientInfo info1(0.0, 1.0, 0.0);
+
+      auto client_info_f = [&] (ClientId c) -> dmc::ClientInfo {
+	return info1;
+      };
+
+      Queue pq(client_info_f, true);
+
+      EXPECT_EQ(0u, pq.client_count());
+      EXPECT_EQ(0u, pq.request_count());
+
+      ReqParams req_params(1,1);
+
+      pq.add_request(MyReq(1), client1, req_params);
+      pq.add_request(MyReq(2), client1, req_params);
+      pq.add_request(MyReq(3), client1, req_params);
+      pq.add_request(MyReq(4), client1, req_params);
+      pq.add_request(MyReq(5), client1, req_params);
+      pq.add_request(MyReq(6), client1, req_params);
+
+      EXPECT_EQ(1u, pq.client_count());
+      EXPECT_EQ(6u, pq.request_count());
+
+      // now remove odd ids in forward order
+
+      std::vector<MyReq> capture;
+      pq.remove_by_req_filter(
+	[&capture] (const MyReq& r) -> bool {
+	  if (1 == r.id % 2) {
+	    capture.insert(capture.begin(), r);
+	    return true;
+	  } else {
+	    return false;
+	  }
+	},
+	true);
+
+      EXPECT_EQ(3u, pq.request_count());
+      EXPECT_EQ(3u, capture.size());
+      EXPECT_EQ(1, capture[0].id) << "items should come out in forward order";
+      EXPECT_EQ(3, capture[1].id) << "items should come out in forward order";
+      EXPECT_EQ(5, capture[2].id) << "items should come out in forward order";
+
+      // now remove even ids in reverse order
+
+      std::vector<MyReq> capture2;
+      pq.remove_by_req_filter(
+	[&capture2] (const MyReq& r) -> bool {
+	  if (0 == r.id % 2) {
+	    capture2.push_back(r);
+	    return true;
+	  } else {
+	    return false;
+	  }
+	},
+	true);
+
+      EXPECT_EQ(0u, pq.request_count());
+      EXPECT_EQ(3u, capture2.size());
+      EXPECT_EQ(6, capture2[0].id) << "items should come out in reverse order";
+      EXPECT_EQ(4, capture2[1].id) << "items should come out in reverse order";
+      EXPECT_EQ(2, capture2[2].id) << "items should come out in reverse order";
+    } // TEST
+
+
+    TEST(dmclock_server, remove_by_client) {
+      struct MyReq {
+	int id;
+
+	MyReq(int _id) :
+	  id(_id)
+	{
+	  // empty
+	}
+      }; // MyReq
+
+      using ClientId = int;
+      using Queue = dmc::PullPriorityQueue<ClientId,MyReq>;
+
+      ClientId client1 = 17;
+      ClientId client2 = 98;
+
+      dmc::ClientInfo info1(0.0, 1.0, 0.0);
+
+      auto client_info_f = [&] (ClientId c) -> dmc::ClientInfo {
+	return info1;
+      };
+
+      Queue pq(client_info_f, true);
+
+      EXPECT_EQ(0u, pq.client_count());
+      EXPECT_EQ(0u, pq.request_count());
+
+      ReqParams req_params(1,1);
+
+      pq.add_request(MyReq(1), client1, req_params);
+      pq.add_request(MyReq(11), client1, req_params);
+      pq.add_request(MyReq(2), client2, req_params);
+      pq.add_request(MyReq(0), client2, req_params);
+      pq.add_request(MyReq(13), client2, req_params);
+      pq.add_request(MyReq(2), client2, req_params);
+      pq.add_request(MyReq(13), client2, req_params);
+      pq.add_request(MyReq(98), client2, req_params);
+      pq.add_request(MyReq(44), client1, req_params);
+
+      EXPECT_EQ(2u, pq.client_count());
+      EXPECT_EQ(9u, pq.request_count());
+
+      std::list<MyReq> removed;
+
+      pq.remove_by_client(client1,
+			  true,
+			  [&removed] (const MyReq& r) {
+			    removed.push_front(r);
+			  });
+
+      EXPECT_EQ(3u, removed.size());
+      EXPECT_EQ(1, removed.front().id);
+      removed.pop_front();
+      EXPECT_EQ(11, removed.front().id);
+      removed.pop_front();
+      EXPECT_EQ(44, removed.front().id);
+      removed.pop_front();
+
+      EXPECT_EQ(6u, pq.request_count());
+
+      Queue::PullReq pr = pq.pull_request();
+      EXPECT_TRUE(pr.is_retn());
+      EXPECT_EQ(2, pr.get_retn().request->id);
+
+      pr = pq.pull_request();
+      EXPECT_TRUE(pr.is_retn());
+      EXPECT_EQ(0, pr.get_retn().request->id);
+
+      pq.remove_by_client(client2);
+      EXPECT_EQ(0u, pq.request_count()) <<
+	"after second client removed, none left";
+    } // TEST
+
+
+    TEST(dmclock_server_pull, pull_weight) {
+      using ClientId = int;
+      using Queue = dmc::PullPriorityQueue<ClientId,Request>;
+      using QueueRef = std::unique_ptr<Queue>;
+
+      ClientId client1 = 17;
+      ClientId client2 = 98;
+
+      dmc::ClientInfo info1(0.0, 1.0, 0.0);
+      dmc::ClientInfo info2(0.0, 2.0, 0.0);
+
+      QueueRef pq;
+
+      auto client_info_f = [&] (ClientId c) -> dmc::ClientInfo {
+	if (client1 == c) return info1;
+	else if (client2 == c) return info2;
+	else {
+	  ADD_FAILURE() << "client info looked up for non-existant client";
+	  return info1;
+	}
+      };
+
+      pq = QueueRef(new Queue(client_info_f, false));
+
+      Request req;
+      ReqParams req_params(1,1);
+
+      auto now = dmc::get_time();
+
+      for (int i = 0; i < 5; ++i) {
+	pq->add_request(req, client1, req_params);
+	pq->add_request(req, client2, req_params);
+	now += 0.0001;
+      }
+
+      int c1_count = 0;
+      int c2_count = 0;
+      for (int i = 0; i < 6; ++i) {
+	Queue::PullReq pr = pq->pull_request();
+	EXPECT_EQ(Queue::NextReqType::returning, pr.type);
+	auto& retn = boost::get<Queue::PullReq::Retn>(pr.data);
+
+	if (client1 == retn.client) ++c1_count;
+	else if (client2 == retn.client) ++c2_count;
+	else ADD_FAILURE() << "got request from neither of two clients";
+
+	EXPECT_EQ(PhaseType::priority, retn.phase);
+      }
+
+      EXPECT_EQ(2, c1_count) <<
+	"one-third of request should have come from first client";
+      EXPECT_EQ(4, c2_count) <<
+	"two-thirds of request should have come from second client";
+    }
+
+
+    TEST(dmclock_server_pull, pull_reservation) {
+      using ClientId = int;
+      using Queue = dmc::PullPriorityQueue<ClientId,Request>;
+      using QueueRef = std::unique_ptr<Queue>;
+
+      ClientId client1 = 52;
+      ClientId client2 = 8;
+
+      dmc::ClientInfo info1(2.0, 0.0, 0.0);
+      dmc::ClientInfo info2(1.0, 0.0, 0.0);
+
+      auto client_info_f = [&] (ClientId c) -> dmc::ClientInfo {
+	if (client1 == c) return info1;
+	else if (client2 == c) return info2;
+	else {
+	  ADD_FAILURE() << "client info looked up for non-existant client";
+	  return info1;
+	}
+      };
+
+      QueueRef pq(new Queue(client_info_f, false));
+
+      Request req;
+      ReqParams req_params(1,1);
+
+      // make sure all times are well before now
+      auto old_time = dmc::get_time() - 100.0;
+
+      for (int i = 0; i < 5; ++i) {
+	pq->add_request_time(req, client1, req_params, old_time);
+	pq->add_request_time(req, client2, req_params, old_time);
+	old_time += 0.001;
+      }
+
+      int c1_count = 0;
+      int c2_count = 0;
+
+      for (int i = 0; i < 6; ++i) {
+	Queue::PullReq pr = pq->pull_request();
+	EXPECT_EQ(Queue::NextReqType::returning, pr.type);
+	auto& retn = boost::get<Queue::PullReq::Retn>(pr.data);
+
+	if (client1 == retn.client) ++c1_count;
+	else if (client2 == retn.client) ++c2_count;
+	else ADD_FAILURE() << "got request from neither of two clients";
+
+	EXPECT_EQ(PhaseType::reservation, retn.phase);
+      }
+
+      EXPECT_EQ(4, c1_count) <<
+	"two-thirds of request should have come from first client";
+      EXPECT_EQ(2, c2_count) <<
+	"one-third of request should have come from second client";
+    } // dmclock_server_pull.pull_reservation
+
+
+    // This test shows what happens when a request can be ready (under
+    // limit) but not schedulable since proportion tag is 0. We expect
+    // to get some future and none responses.
+    TEST(dmclock_server_pull, ready_and_under_limit) {
+      using ClientId = int;
+      using Queue = dmc::PullPriorityQueue<ClientId,Request>;
+      using QueueRef = std::unique_ptr<Queue>;
+
+      ClientId client1 = 52;
+      ClientId client2 = 8;
+
+      dmc::ClientInfo info1(1.0, 0.0, 0.0);
+      dmc::ClientInfo info2(1.0, 0.0, 0.0);
+
+      auto client_info_f = [&] (ClientId c) -> dmc::ClientInfo {
+	if (client1 == c) return info1;
+	else if (client2 == c) return info2;
+	else {
+	  ADD_FAILURE() << "client info looked up for non-existant client";
+	  return info1;
+	}
+      };
+
+      QueueRef pq(new Queue(client_info_f, false));
+
+      Request req;
+      ReqParams req_params(1,1);
+
+      // make sure all times are well before now
+      auto start_time = dmc::get_time() - 100.0;
+
+      // add six requests; for same client reservations spaced one apart
+      for (int i = 0; i < 3; ++i) {
+	pq->add_request_time(req, client1, req_params, start_time);
+	pq->add_request_time(req, client2, req_params, start_time);
+      }
+
+      Queue::PullReq pr = pq->pull_request(start_time + 0.5);
+      EXPECT_EQ(Queue::NextReqType::returning, pr.type);
+
+      pr = pq->pull_request(start_time + 0.5);
+      EXPECT_EQ(Queue::NextReqType::returning, pr.type);
+
+      pr = pq->pull_request(start_time + 0.5);
+      EXPECT_EQ(Queue::NextReqType::future, pr.type) <<
+	"too soon for next reservation";
+
+      pr = pq->pull_request(start_time + 1.5);
+      EXPECT_EQ(Queue::NextReqType::returning, pr.type);
+
+      pr = pq->pull_request(start_time + 1.5);
+      EXPECT_EQ(Queue::NextReqType::returning, pr.type);
+
+      pr = pq->pull_request(start_time + 1.5);
+      EXPECT_EQ(Queue::NextReqType::future, pr.type) <<
+	"too soon for next reservation";
+
+      pr = pq->pull_request(start_time + 2.5);
+      EXPECT_EQ(Queue::NextReqType::returning, pr.type);
+
+      pr = pq->pull_request(start_time + 2.5);
+      EXPECT_EQ(Queue::NextReqType::returning, pr.type);
+
+      pr = pq->pull_request(start_time + 2.5);
+      EXPECT_EQ(Queue::NextReqType::none, pr.type) << "no more requests left";
+    }
+
+
+    TEST(dmclock_server_pull, pull_none) {
+      using ClientId = int;
+      using Queue = dmc::PullPriorityQueue<ClientId,Request>;
+      using QueueRef = std::unique_ptr<Queue>;
+
+      dmc::ClientInfo info(1.0, 1.0, 1.0);
+
+      auto client_info_f = [&] (ClientId c) -> dmc::ClientInfo {
+	return info;
+      };
+
+      QueueRef pq(new Queue(client_info_f, false));
+
+      // Request req;
+      ReqParams req_params(1,1);
+
+      auto now = dmc::get_time();
+
+      Queue::PullReq pr = pq->pull_request(now + 100);
+
+      EXPECT_EQ(Queue::NextReqType::none, pr.type);
+    }
+
+
+    TEST(dmclock_server_pull, pull_future) {
+      using ClientId = int;
+      using Queue = dmc::PullPriorityQueue<ClientId,Request>;
+      using QueueRef = std::unique_ptr<Queue>;
+
+      ClientId client1 = 52;
+      // ClientId client2 = 8;
+
+      dmc::ClientInfo info(1.0, 0.0, 1.0);
+
+      auto client_info_f = [&] (ClientId c) -> dmc::ClientInfo {
+	return info;
+      };
+
+      QueueRef pq(new Queue(client_info_f, false));
+
+      Request req;
+      ReqParams req_params(1,1);
+
+      // make sure all times are well before now
+      auto now = dmc::get_time();
+
+      pq->add_request_time(req, client1, req_params, now + 100);
+      Queue::PullReq pr = pq->pull_request(now);
+
+      EXPECT_EQ(Queue::NextReqType::future, pr.type);
+
+      Time when = boost::get<Time>(pr.data);
+      EXPECT_EQ(now + 100, when);
+    }
+
+
+    TEST(dmclock_server_pull, pull_future_limit_break_weight) {
+      using ClientId = int;
+      using Queue = dmc::PullPriorityQueue<ClientId,Request>;
+      using QueueRef = std::unique_ptr<Queue>;
+
+      ClientId client1 = 52;
+      // ClientId client2 = 8;
+
+      dmc::ClientInfo info(0.0, 1.0, 1.0);
+
+      auto client_info_f = [&] (ClientId c) -> dmc::ClientInfo {
+	return info;
+      };
+
+      QueueRef pq(new Queue(client_info_f, true));
+
+      Request req;
+      ReqParams req_params(1,1);
+
+      // make sure all times are well before now
+      auto now = dmc::get_time();
+
+      pq->add_request_time(req, client1, req_params, now + 100);
+      Queue::PullReq pr = pq->pull_request(now);
+
+      EXPECT_EQ(Queue::NextReqType::returning, pr.type);
+
+      auto& retn = boost::get<Queue::PullReq::Retn>(pr.data);
+      EXPECT_EQ(client1, retn.client);
+    }
+
+
+    TEST(dmclock_server_pull, pull_future_limit_break_reservation) {
+      using ClientId = int;
+      using Queue = dmc::PullPriorityQueue<ClientId,Request>;
+      using QueueRef = std::unique_ptr<Queue>;
+
+      ClientId client1 = 52;
+      // ClientId client2 = 8;
+
+      dmc::ClientInfo info(1.0, 0.0, 1.0);
+
+      auto client_info_f = [&] (ClientId c) -> dmc::ClientInfo {
+	return info;
+      };
+
+      QueueRef pq(new Queue(client_info_f, true));
+
+      Request req;
+      ReqParams req_params(1,1);
+
+      // make sure all times are well before now
+      auto now = dmc::get_time();
+
+      pq->add_request_time(req, client1, req_params, now + 100);
+      Queue::PullReq pr = pq->pull_request(now);
+
+      EXPECT_EQ(Queue::NextReqType::returning, pr.type);
+
+      auto& retn = boost::get<Queue::PullReq::Retn>(pr.data);
+      EXPECT_EQ(client1, retn.client);
+    }
+  } // namespace dmclock
+} // namespace crimson
diff --git a/test/test_test_client.cc b/test/test_test_client.cc
new file mode 100644
index 00000000000..6015cb9bf7b
--- /dev/null
+++ b/test/test_test_client.cc
@@ -0,0 +1,123 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ */
+
+#include <atomic>
+#include <thread>
+#include <chrono>
+#include <iostream>
+
+#include "gtest/gtest.h"
+
+#include "sim_recs.h"
+#include "sim_client.h"
+
+#include "test_dmclock.h"
+
+
+using namespace std::placeholders;
+
+namespace dmc = crimson::dmclock;
+namespace test = crimson::test_dmc;
+namespace sim = crimson::qos_simulation;
+
+using TimePoint = std::chrono::time_point<std::chrono::system_clock>;
+
+static TimePoint now() { return std::chrono::system_clock::now(); }
+
+
+TEST(test_client, full_bore_timing) {
+  std::atomic_ulong count(0);
+
+  ServerId server_id = 3;
+
+  sim::TestResponse resp(0);
+  dmc::PhaseType resp_params = dmc::PhaseType::priority;
+  test::DmcClient* client;
+
+  auto start = now();
+  client =
+    new test::DmcClient(ClientId(0),
+			[&] (const ServerId& server,
+			     const sim::TestRequest& req,
+			     const ClientId& client_id,
+			     const dmc::ReqParams& req_params) {
+			  ++count;
+			  client->receive_response(resp, client_id, resp_params);
+			},
+			[&] (const uint64_t seed) -> ServerId& {
+			  return server_id;
+			},
+			test::dmc_client_accumulate_f,
+			1000, // ops to run
+			100, // iops goal
+			5); // outstanding ops allowed
+  client->wait_until_done();
+  auto end = now();
+  EXPECT_EQ(1000u, count) << "didn't get right number of ops";
+
+  int milliseconds = (end - start) / std::chrono::milliseconds(1);
+  EXPECT_LT(10000, milliseconds) << "timing too fast to be correct";
+  EXPECT_GT(12000, milliseconds) << "timing suspiciously slow";
+}
+
+
+TEST(test_client, paused_timing) {
+  std::atomic_ulong count(0);
+  std::atomic_ulong unresponded_count(0);
+  std::atomic_bool auto_respond(false);
+
+  ClientId my_client_id = 0;
+  ServerId server_id = 3;
+
+  sim::TestResponse resp(0);
+  dmc::PhaseType resp_params = dmc::PhaseType::priority;
+  test::DmcClient* client;
+
+  auto start = now();
+  client =
+    new test::DmcClient(my_client_id,
+			[&] (const ServerId& server,
+			     const sim::TestRequest& req,
+			     const ClientId& client_id,
+			     const dmc::ReqParams& req_params) {
+			  ++count;
+			  if (auto_respond.load()) {
+			    client->receive_response(resp, client_id, resp_params);
+			  } else {
+			    ++unresponded_count;
+			  }
+			},
+			[&] (const uint64_t seed) -> ServerId& {
+			  return server_id;
+			},
+			test::dmc_client_accumulate_f,
+
+			1000, // ops to run
+			100, // iops goal
+			50); // outstanding ops allowed
+  std::thread t([&]() {
+      std::this_thread::sleep_for(std::chrono::seconds(5));
+      EXPECT_EQ(50u, unresponded_count.load()) <<
+	"should have 50 unresponded calls";
+      auto_respond = true;
+      // respond to those 50 calls
+      for(int i = 0; i < 50; ++i) {
+	client->receive_response(resp, my_client_id, resp_params);
+	--unresponded_count;
+      }
+    });
+
+  client->wait_until_done();
+  auto end = now();
+  int milliseconds = (end - start) / std::chrono::milliseconds(1);
+
+  // the 50 outstanding ops allowed means the first half-second of
+  // requests get responded to during the 5 second pause. So we have
+  // to adjust our expectations by a half-second.
+  EXPECT_LT(15000 - 500, milliseconds) << "timing too fast to be correct";
+  EXPECT_GT(17000 - 500, milliseconds) << "timing suspiciously slow";
+  t.join();
+}
-- 
2.39.5