ceph_perf_local_SOURCES = test/perf_local.cc test/perf_helper.cc
ceph_perf_local_LDADD = $(LIBOS) $(CEPH_GLOBAL)
+ceph_perf_local_CXXFLAGS = ${AM_CXXFLAGS} \
+ ${INTEL_SSE_FLAGS} \
+ ${INTEL_SSE2_FLAGS}
+
noinst_HEADERS += test/perf_helper.h
bin_DEBUGPROGRAMS += ceph_perf_local
// * Create a new entry for the test in the #tests table.
#include <vector>
#include <sched.h>
-#if defined(__x86_64__) || defined(__amd64__)
+
+#include "acconfig.h"
+#ifdef HAVE_SSE
#include <xmmintrin.h>
#endif
}
-#if defined(__x86_64__) || defined(__amd64__)
+#ifdef HAVE_SSE
/**
* Prefetch the cache lines containing [object, object + numBytes) into the
* processor's caches.
*/
static inline void prefetch(const void *object, uint64_t num_bytes)
{
-#if defined(__x86_64__) || defined(__amd64__)
uint64_t offset = reinterpret_cast<uint64_t>(object) & 0x3fUL;
const char* p = reinterpret_cast<const char*>(object) - offset;
for (uint64_t i = 0; i < offset + num_bytes; i += 64)
_mm_prefetch(p + i, _MM_HINT_T0);
-#endif
}
#endif
// Measure the cost of the prefetch instruction.
double perf_prefetch()
{
-#if defined(__x86_64__) || defined(__amd64__)
+#ifdef HAVE_SSE
uint64_t total_ticks = 0;
int count = 10;
char buf[16 * 64];
// Measure the cost of an lfence instruction.
double lfence()
{
+#ifdef HAVE_SSE2
int count = 1000000;
uint64_t start = Cycles::rdtsc();
for (int i = 0; i < count; i++) {
}
uint64_t stop = Cycles::rdtsc();
return Cycles::to_seconds(stop - start)/count;
+#else
+ return -1;
+#endif
}
// Measure the cost of an sfence instruction.
double sfence()
{
+#ifdef HAVE_SSE
int count = 1000000;
uint64_t start = Cycles::rdtsc();
for (int i = 0; i < count; i++) {
}
uint64_t stop = Cycles::rdtsc();
return Cycles::to_seconds(stop - start)/count;
+#else
+ return -1;
+#endif
}
// Measure the cost of acquiring and releasing a SpinLock (assuming the