From 6b254a78729d79d424ab234d5ca3e8855c374fde Mon Sep 17 00:00:00 2001 From: Adam Kupczyk Date: Tue, 28 Apr 2020 11:47:52 +0200 Subject: [PATCH] common: Add pretty_binary_string conversions Moved all incarnations of pretty binary printing into one common place. Signed-off-by: Adam Kupczyk --- src/common/CMakeLists.txt | 1 + src/common/pretty_binary.cc | 95 +++++++++++++++++++++++++++ src/common/pretty_binary.h | 67 +++++++++++++++++++ src/kv/RocksDBStore.h | 50 +------------- src/os/bluestore/BlueStore.cc | 53 +-------------- src/os/kstore/KStore.cc | 53 +-------------- src/test/common/CMakeLists.txt | 7 ++ src/test/common/test_pretty_binary.cc | 50 ++++++++++++++ 8 files changed, 223 insertions(+), 153 deletions(-) create mode 100644 src/common/pretty_binary.cc create mode 100644 src/common/pretty_binary.h create mode 100644 src/test/common/test_pretty_binary.cc diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index cb6f4faa65e..3bc1195d24f 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -100,6 +100,7 @@ set(common_srcs strtol.cc types.cc url_escape.cc + pretty_binary.cc utf8.c util.cc version.cc) diff --git a/src/common/pretty_binary.cc b/src/common/pretty_binary.cc new file mode 100644 index 00000000000..d58b0fb6ec2 --- /dev/null +++ b/src/common/pretty_binary.cc @@ -0,0 +1,95 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "pretty_binary.h" +#include +#include + +std::string pretty_binary_string_reverse(const std::string& pretty) +{ + size_t i = 0; + auto raise = [&](size_t failpos) { + std::ostringstream ss; + ss << "invalid char at pos " << failpos << " of " << pretty; + throw std::invalid_argument(ss.str()); + }; + auto hexdigit = [&](unsigned char c) -> int32_t { + if (c >= '0' && c <= '9') return c - '0'; + if (c >= 'a' && c <= 'f') return c - 'a' + 10; + if (c >= 'A' && c <= 'F') return c - 'A' + 10; + return -1; + }; + auto require = [&](unsigned char c) { + if (i >= pretty.length() || pretty[i] != c) { + raise(i); + } + ++i; + }; + std::string bin; + if (pretty.empty()) + return bin; + bin.reserve(pretty.length()); + bool strmode; + switch (pretty[0]) { + case '\'': + ++i; + strmode = true; + break; + case '0': + ++i; + require('x'); + if (i == pretty.length()) { + raise(i); + } + strmode = false; + break; + default: + raise(0); + } + for (; i < pretty.length();) { + if (strmode) { + if (pretty[i] == '\'') { + if (i + 1 < pretty.length() && pretty[i + 1] == '\'') { + bin.push_back('\''); + i += 2; + } else { + ++i; + strmode = false; + if (i + 1 < pretty.length()) { + require('0'); + require('x'); + if (i == pretty.length()) { + raise(i); + } + } + } + } else { + bin.push_back(pretty[i]); + ++i; + } + } else { + if (pretty[i] != '\'') { + int32_t hex0 = hexdigit(pretty[i]); + if (hex0 < 0) { + raise(i); + } + ++i; + if (i >= pretty.length()) { + raise(i); + } + int32_t hex1 = hexdigit(pretty[i]); + if (hex1 < 0) { + raise(i); + } + bin.push_back(hex0 * 0x10 + hex1); + ++i; + } else { + strmode = true; + ++i; + } + } + } + if (strmode) + raise(i); + return bin; +} diff --git a/src/common/pretty_binary.h b/src/common/pretty_binary.h new file mode 100644 index 00000000000..5f1829747b2 --- /dev/null +++ b/src/common/pretty_binary.h @@ -0,0 +1,67 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include + +template +static std::string pretty_binary_string(const S& bin) +{ + std::string pretty; + if (bin.empty()) + return pretty; + pretty.reserve(bin.length() * 3); + auto printable = [](unsigned char c) -> bool { + return (c >= 32) && (c <= 126); + }; + auto append_hex = [&](unsigned char c) { + static const char hex[16] = {'0', '1', '2', '3', + '4', '5', '6', '7', + '8', '9', 'A', 'B', + 'C', 'D', 'E', 'F'}; + pretty.push_back(hex[c / 16]); + pretty.push_back(hex[c % 16]); + }; + // prologue + bool strmode = printable(bin[0]); + if (strmode) { + pretty.push_back('\''); + } else { + pretty.push_back('0'); + pretty.push_back('x'); + } + for (size_t i = 0; i < bin.length(); ++i) { + // change mode from hex to str if following 3 characters are printable + if (strmode) { + if (!printable(bin[i])) { + pretty.push_back('\''); + pretty.push_back('0'); + pretty.push_back('x'); + strmode = false; + } + } else { + if (i + 2 < bin.length() && + printable(bin[i]) && + printable(bin[i + 1]) && + printable(bin[i + 2])) { + pretty.push_back('\''); + strmode = true; + } + } + if (strmode) { + if (bin[i] == '\'') + pretty.push_back('\''); + pretty.push_back(bin[i]); + } else { + append_hex(bin[i]); + } + } + // epilog + if (strmode) { + pretty.push_back('\''); + } + return pretty; +} + +std::string pretty_binary_string_reverse(const std::string& pretty); diff --git a/src/kv/RocksDBStore.h b/src/kv/RocksDBStore.h index fe749145b32..db004cf35dd 100644 --- a/src/kv/RocksDBStore.h +++ b/src/kv/RocksDBStore.h @@ -27,7 +27,7 @@ #include "common/Cond.h" #include "common/ceph_context.h" #include "common/PriorityCache.h" - +#include "common/pretty_binary.h" enum { l_rocksdb_first = 34300, @@ -265,54 +265,6 @@ public: struct RocksWBHandler: public rocksdb::WriteBatch::Handler { std::string seen ; int num_seen = 0; - static std::string pretty_binary_string(const std::string& in) { - char buf[10]; - std::string out; - out.reserve(in.length() * 3); - enum { NONE, HEX, STRING } mode = NONE; - unsigned from = 0, i; - for (i=0; i < in.length(); ++i) { - if ((in[i] < 32 || (unsigned char)in[i] > 126) || - (mode == HEX && in.length() - i >= 4 && - ((in[i] < 32 || (unsigned char)in[i] > 126) || - (in[i+1] < 32 || (unsigned char)in[i+1] > 126) || - (in[i+2] < 32 || (unsigned char)in[i+2] > 126) || - (in[i+3] < 32 || (unsigned char)in[i+3] > 126)))) { - - if (mode == STRING) { - out.append(in.substr(from, i - from)); - out.push_back('\''); - } - if (mode != HEX) { - out.append("0x"); - mode = HEX; - } - if (in.length() - i >= 4) { - // print a whole u32 at once - snprintf(buf, sizeof(buf), "%08x", - (uint32_t)(((unsigned char)in[i] << 24) | - ((unsigned char)in[i+1] << 16) | - ((unsigned char)in[i+2] << 8) | - ((unsigned char)in[i+3] << 0))); - i += 3; - } else { - snprintf(buf, sizeof(buf), "%02x", (int)(unsigned char)in[i]); - } - out.append(buf); - } else { - if (mode != STRING) { - out.push_back('\''); - mode = STRING; - from = i; - } - } - } - if (mode == STRING) { - out.append(in.substr(from, i - from)); - out.push_back('\''); - } - return out; - } void Put(const rocksdb::Slice& key, const rocksdb::Slice& value) override { std::string prefix ((key.ToString()).substr(0,1)); diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index fe617344530..66ec2fa6c0a 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -44,6 +44,7 @@ #include "perfglue/heap_profiler.h" #include "common/blkdev.h" #include "common/numa.h" +#include "common/pretty_binary.h" #if defined(WITH_LTTNG) #define TRACEPOINT_DEFINE @@ -251,58 +252,6 @@ static int decode_escaped(const char *p, string *out) return p - orig_p; } -// some things we encode in binary (as le32 or le64); print the -// resulting key strings nicely -template -static string pretty_binary_string(const S& in) -{ - char buf[10]; - string out; - out.reserve(in.length() * 3); - enum { NONE, HEX, STRING } mode = NONE; - unsigned from = 0, i; - for (i=0; i < in.length(); ++i) { - if ((in[i] < 32 || (unsigned char)in[i] > 126) || - (mode == HEX && in.length() - i >= 4 && - ((in[i] < 32 || (unsigned char)in[i] > 126) || - (in[i+1] < 32 || (unsigned char)in[i+1] > 126) || - (in[i+2] < 32 || (unsigned char)in[i+2] > 126) || - (in[i+3] < 32 || (unsigned char)in[i+3] > 126)))) { - if (mode == STRING) { - out.append(in.c_str() + from, i - from); - out.push_back('\''); - } - if (mode != HEX) { - out.append("0x"); - mode = HEX; - } - if (in.length() - i >= 4) { - // print a whole u32 at once - snprintf(buf, sizeof(buf), "%08x", - (uint32_t)(((unsigned char)in[i] << 24) | - ((unsigned char)in[i+1] << 16) | - ((unsigned char)in[i+2] << 8) | - ((unsigned char)in[i+3] << 0))); - i += 3; - } else { - snprintf(buf, sizeof(buf), "%02x", (int)(unsigned char)in[i]); - } - out.append(buf); - } else { - if (mode != STRING) { - out.push_back('\''); - mode = STRING; - from = i; - } - } - } - if (mode == STRING) { - out.append(in.c_str() + from, i - from); - out.push_back('\''); - } - return out; -} - template static void _key_encode_shard(shard_id_t shard, T *key) { diff --git a/src/os/kstore/KStore.cc b/src/os/kstore/KStore.cc index ca1268faea7..8534a7ccff2 100644 --- a/src/os/kstore/KStore.cc +++ b/src/os/kstore/KStore.cc @@ -31,7 +31,7 @@ #include "common/errno.h" #include "common/safe_io.h" #include "common/Formatter.h" - +#include "common/pretty_binary.h" #define dout_context cct #define dout_subsys ceph_subsys_kstore @@ -135,57 +135,6 @@ static int decode_escaped(const char *p, string *out) return p - orig_p; } -// some things we encode in binary (as le32 or le64); print the -// resulting key strings nicely -static string pretty_binary_string(const string& in) -{ - char buf[10]; - string out; - out.reserve(in.length() * 3); - enum { NONE, HEX, STRING } mode = NONE; - unsigned from = 0, i; - for (i=0; i < in.length(); ++i) { - if ((in[i] < 32 || (unsigned char)in[i] > 126) || - (mode == HEX && in.length() - i >= 4 && - ((in[i] < 32 || (unsigned char)in[i] > 126) || - (in[i+1] < 32 || (unsigned char)in[i+1] > 126) || - (in[i+2] < 32 || (unsigned char)in[i+2] > 126) || - (in[i+3] < 32 || (unsigned char)in[i+3] > 126)))) { - if (mode == STRING) { - out.append(in.substr(from, i - from)); - out.push_back('\''); - } - if (mode != HEX) { - out.append("0x"); - mode = HEX; - } - if (in.length() - i >= 4) { - // print a whole u32 at once - snprintf(buf, sizeof(buf), "%08x", - (uint32_t)(((unsigned char)in[i] << 24) | - ((unsigned char)in[i+1] << 16) | - ((unsigned char)in[i+2] << 8) | - ((unsigned char)in[i+3] << 0))); - i += 3; - } else { - snprintf(buf, sizeof(buf), "%02x", (int)(unsigned char)in[i]); - } - out.append(buf); - } else { - if (mode != STRING) { - out.push_back('\''); - mode = STRING; - from = i; - } - } - } - if (mode == STRING) { - out.append(in.substr(from, i - from)); - out.push_back('\''); - } - return out; -} - static void _key_encode_shard(shard_id_t shard, string *key) { // make field ordering match with ghobject_t compare operations diff --git a/src/test/common/CMakeLists.txt b/src/test/common/CMakeLists.txt index e67d9718b29..dbb2f63b0a9 100644 --- a/src/test/common/CMakeLists.txt +++ b/src/test/common/CMakeLists.txt @@ -193,6 +193,13 @@ add_executable(unittest_url_escape add_ceph_unittest(unittest_url_escape) target_link_libraries(unittest_url_escape ceph-common) +# unittest_pretty_binary +add_executable(unittest_pretty_binary + test_pretty_binary.cc + ) +add_ceph_unittest(unittest_pretty_binary) +target_link_libraries(unittest_pretty_binary ceph-common) + # unittest_readahead add_executable(unittest_readahead Readahead.cc diff --git a/src/test/common/test_pretty_binary.cc b/src/test/common/test_pretty_binary.cc new file mode 100644 index 00000000000..837dbefc121 --- /dev/null +++ b/src/test/common/test_pretty_binary.cc @@ -0,0 +1,50 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/pretty_binary.h" + +#include "gtest/gtest.h" + +TEST(pretty_binary, print) { + ASSERT_EQ(pretty_binary_string(std::string("foo\001\002bars")), std::string("'foo'0x0102'bars'")); + ASSERT_EQ(pretty_binary_string(std::string("foo''bars")), std::string("'foo''''bars'")); + ASSERT_EQ(pretty_binary_string(std::string("foo\377 \200!!")), std::string("'foo'0xFF2020802121")); + ASSERT_EQ(pretty_binary_string(std::string("\001\002\003\004")), std::string("0x01020304")); +} + +TEST(pretty_binary, unprint) { + ASSERT_EQ(pretty_binary_string_reverse(std::string("'foo'0x0102'bars'")), std::string("foo\001\002bars")); + ASSERT_EQ(pretty_binary_string_reverse(std::string("'foo''''bars'")), std::string("foo''bars")); + ASSERT_EQ(pretty_binary_string_reverse(std::string("'foo'0xFF2020802121")), std::string("foo\377 \200!!")); + ASSERT_EQ(pretty_binary_string_reverse(std::string("0x01020304")),std::string("\001\002\003\004")); +} + +TEST(pretty_binary, all_chars) { + std::string a; + for (unsigned j = 0; j < 256; ++j) { + a.push_back((char)j); + } + std::string b = pretty_binary_string(a); + ASSERT_EQ(a, pretty_binary_string_reverse(b)); +} + +TEST(pretty_binary, random) { + for (size_t i = 0; i < 100000; i++) { + std::string a; + size_t r = rand() % 100; + for (size_t j = 0; j < r; ++j) { + a.push_back((unsigned char)rand()); + } + std::string b = pretty_binary_string(a); + ASSERT_EQ(a, pretty_binary_string_reverse(b)); + } +} + +TEST(pretty_binary, invalid) { + ASSERT_THROW(pretty_binary_string_reverse("'"), std::invalid_argument); + ASSERT_THROW(pretty_binary_string_reverse("0x1"), std::invalid_argument); + ASSERT_THROW(pretty_binary_string_reverse("0x"), std::invalid_argument); + ASSERT_THROW(pretty_binary_string_reverse("'''"), std::invalid_argument); + ASSERT_THROW(pretty_binary_string_reverse("'a'x"), std::invalid_argument); + ASSERT_THROW(pretty_binary_string_reverse("'a'0x"), std::invalid_argument); +} -- 2.39.5