From: Chunsong Feng Date: Sat, 26 Dec 2020 09:06:06 +0000 (+0800) Subject: test/perf_local: Add div32,prefetch,memory barrier bench for arm64 X-Git-Tag: v16.1.0~163^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F38719%2Fhead;p=ceph.git test/perf_local: Add div32,prefetch,memory barrier bench for arm64 arm64 support div32,prefetch,memory barrier, so add them to benchmark. Signed-off-by: Chunsong Feng Signed-off-by: luo rixin --- diff --git a/src/test/perf_local.cc b/src/test/perf_local.cc index 9df3b90cf4a..ecd7dc79246 100644 --- a/src/test/perf_local.cc +++ b/src/test/perf_local.cc @@ -381,6 +381,18 @@ double div32() } uint64_t stop = Cycles::rdtsc(); return Cycles::to_seconds(stop - start)/count; +#elif defined(__aarch64__) + int count = 1000000; + uint64_t start = Cycles::rdtsc(); + uint64_t numerator = 0xa5a5a5a555aa55aaUL; + uint32_t divisor = 0xaa55aa55U; + uint32_t result; + for (int i = 0; i < count; i++) { + asm volatile("udiv %0, %1, %2" : "=r"(result) : + "r"(numerator), "r"(divisor)); + } + uint64_t stop = Cycles::rdtsc(); + return Cycles::to_seconds(stop - start)/count; #else return -1; #endif @@ -610,12 +622,20 @@ static inline void prefetch(const void *object, uint64_t num_bytes) for (uint64_t i = 0; i < offset + num_bytes; i += 64) _mm_prefetch(p + i, _MM_HINT_T0); } +#elif defined(__aarch64__) +static inline void prefetch(const void *object, uint64_t num_bytes) +{ + uint64_t offset = reinterpret_cast(object) & 0x3fUL; + const char* ptr = reinterpret_cast(object) - offset; + for (uint64_t i = 0; i < offset + num_bytes; i += 64, ptr += 64) + asm volatile("prfm pldl1keep, %a0\n" : : "p" (ptr)); +} #endif // Measure the cost of the prefetch instruction. double perf_prefetch() { -#ifdef HAVE_SSE +#if defined(HAVE_SSE) || defined(__aarch64__) uint64_t total_ticks = 0; int count = 10; char buf[16 * 64]; @@ -692,6 +712,14 @@ double lfence() } uint64_t stop = Cycles::rdtsc(); return Cycles::to_seconds(stop - start)/count; +#elif defined(__aarch64__) + int count = 1000000; + uint64_t start = Cycles::rdtsc(); + for (int i = 0; i < count; i++) { + asm volatile("dmb ishld" ::: "memory"); + } + uint64_t stop = Cycles::rdtsc(); + return Cycles::to_seconds(stop - start)/count; #else return -1; #endif @@ -708,6 +736,14 @@ double sfence() } uint64_t stop = Cycles::rdtsc(); return Cycles::to_seconds(stop - start)/count; +#elif defined(__aarch64__) + int count = 1000000; + uint64_t start = Cycles::rdtsc(); + for (int i = 0; i < count; i++) { + asm volatile("dmb ishst" ::: "memory"); + } + uint64_t stop = Cycles::rdtsc(); + return Cycles::to_seconds(stop - start)/count; #else return -1; #endif