From: Sage Weil Date: Sat, 18 Jun 2016 16:54:10 +0000 (-0400) Subject: include/small_encoding: small encode/decode helpers X-Git-Tag: v11.0.0~91^2~6 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=ed0dd72c4e12875a19d16b27630d96b38c75d099;p=ceph.git include/small_encoding: small encode/decode helpers Signed-off-by: Sage Weil --- diff --git a/src/include/Makefile.am b/src/include/Makefile.am index d17605fb1734..a4bc1ddcceaa 100644 --- a/src/include/Makefile.am +++ b/src/include/Makefile.am @@ -96,6 +96,7 @@ noinst_HEADERS += \ include/rangeset.h \ include/rados.h \ include/rbd_types.h \ + include/small_encoding.h \ include/statlite.h \ include/str_list.h \ include/str_map.h \ diff --git a/src/include/small_encoding.h b/src/include/small_encoding.h new file mode 100644 index 000000000000..5efca0cfb2bf --- /dev/null +++ b/src/include/small_encoding.h @@ -0,0 +1,277 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#ifndef CEPH_SMALL_ENCODING_H +#define CEPH_SMALL_ENCODING_H + +#include "include/buffer.h" +#include "include/int_types.h" + +// varint encoding +// +// high bit of every byte indicates whether another byte follows. +template +inline void small_encode_varint(T v, bufferlist& bl) { + uint8_t byte = v & 0x7f; + v >>= 7; + while (v) { + byte |= 0x80; + ::encode(byte, bl); + byte = (v & 0x7f); + v >>= 7; + } + ::encode(byte, bl); +} + +template +inline void small_decode_varint(T& v, bufferlist::iterator& p) +{ + uint8_t byte; + ::decode(byte, p); + v = byte & 0x7f; + int shift = 7; + while (byte & 0x80) { + ::decode(byte, p); + v |= (T)(byte & 0x7f) << shift; + shift += 7; + } +} + +// signed varint encoding +// +// low bit = 1 = negative, 0 = positive +// high bit of every byte indicates whether another byte follows. +template +inline void small_encode_signed_varint(T v, bufferlist& bl) { + uint8_t byte = 0; + if (v < 0) { + v = -v; + byte = 1; + } + byte |= (v & 0x3f) << 1; + v >>= 6; + while (v) { + byte |= 0x80; + ::encode(byte, bl); + byte = (v & 0x7f); + v >>= 7; + } + ::encode(byte, bl); +} + +template +inline void small_decode_signed_varint(T& v, bufferlist::iterator& p) +{ + uint8_t byte; + ::decode(byte, p); + bool negative = byte & 1; + v = (byte & 0x7e) >> 1; + int shift = 6; + while (byte & 0x80) { + ::decode(byte, p); + v |= (T)(byte & 0x7f) << shift; + shift += 7; + } + if (negative) { + v = -v; + } +} + +// varint + lowz encoding +// +// first(low) 2 bits = how many low zero bits (nibbles) +// high bit of each byte = another byte follows +// (so, 5 bits data in first byte, 7 bits data thereafter) +template +inline void small_encode_varint_lowz(T v, bufferlist& bl) { + int lowz = v ? (ctz(v) / 4) : 0; + uint8_t byte = std::min(lowz, 3); + v >>= byte * 4; + byte |= (((uint8_t)v << 2) & 0x7c); + v >>= 5; + while (v) { + byte |= 0x80; + ::encode(byte, bl); + byte = (v & 0x7f); + v >>= 7; + } + ::encode(byte, bl); +} + +template +inline void small_decode_varint_lowz(T& v, bufferlist::iterator& p) +{ + uint8_t byte; + ::decode(byte, p); + int shift = (byte & 3) * 4; + v = ((byte >> 2) & 0x1f) << shift; + shift += 5; + while (byte & 0x80) { + ::decode(byte, p); + v |= (T)(byte & 0x7f) << shift; + shift += 7; + } +} + +// signed varint + lowz encoding +// +// first low bit = 1 for negative, 0 for positive +// next 2 bits = how many low zero bits (nibbles) +// high bit of each byte = another byte follows +// (so, 4 bits data in first byte, 7 bits data thereafter) +template +inline void small_encode_signed_varint_lowz(T v, bufferlist& bl) { + uint8_t byte = 0; + if (v < 0) { + v = -v; + byte = 1; + } + int lowz = v ? (ctz(v) / 4) : 0; + lowz = std::min(lowz, 3); + byte |= lowz << 1; + v >>= lowz * 4; + byte |= (((uint8_t)v << 3) & 0x78); + v >>= 4; + while (v) { + byte |= 0x80; + ::encode(byte, bl); + byte = (v & 0x7f); + v >>= 7; + } + ::encode(byte, bl); +} + +template +inline void small_decode_signed_varint_lowz(T& v, bufferlist::iterator& p) +{ + uint8_t byte; + ::decode(byte, p); + bool negative = byte & 1; + int shift = (byte & 6) * 2; + v = ((byte >> 3) & 0xf) << shift; + shift += 4; + while (byte & 0x80) { + ::decode(byte, p); + v |= (T)(byte & 0x7f) << shift; + shift += 7; + } + if (negative) { + v = -v; + } +} + + +// LBA +// +// first 1-3 bits = how many low zero bits +// *0 = 12 (common 4 K alignment case) +// *01 = 16 +// *011 = 20 +// *111 = byte +// then 28-30 bits of data +// then last bit = another byte follows +// high bit of each subsequent byte = another byte follows +inline void small_encode_lba(uint64_t v, bufferlist& bl) { + int low_zero_nibbles = v ? (int)(ctz(v) / 4) : 0; + int pos; + uint32_t word; + int t = low_zero_nibbles - 3; + if (t < 0) { + pos = 3; + word = 0x7; + } else if (t < 3) { + v >>= (low_zero_nibbles * 4); + pos = t + 1; + word = (1 << t) - 1; + } else { + pos = 3; + word = 0x3; + } + word |= (v << pos) & 0x7fffffff; + v >>= 31 - pos; + if (!v) { + ::encode(word, bl); + return; + } + word |= 0x80000000; + ::encode(word, bl); + uint8_t byte = v & 0x7f; + v >>= 7; + while (v) { + byte |= 0x80; + ::encode(byte, bl); + byte = (v & 0x7f); + v >>= 7; + } + ::encode(byte, bl); +} + +inline void small_decode_lba(uint64_t& v, bufferlist::iterator& p) { + uint32_t word; + ::decode(word, p); + int shift; + switch (word & 7) { + case 0: + case 2: + case 4: + case 6: + v = (uint64_t)(word & 0x7ffffffe) << (12 - 1); + shift = 12 + 30; + break; + case 1: + case 5: + v = (uint64_t)(word & 0x7ffffffc) << (16 - 2); + shift = 16 + 29; + break; + case 3: + v = (uint64_t)(word & 0x7ffffff8) << (20 - 3); + shift = 20 + 28; + break; + case 7: + v = (uint64_t)(word & 0x7ffffff8) >> 3; + shift = 28; + } + uint8_t byte = word >> 24; + while (byte & 0x80) { + ::decode(byte, p); + v |= (uint64_t)(byte & 0x7f) << shift; + shift += 7; + } +} + + +// short bufferptrs, bufferlists, strings +template +inline void small_encode_buf_lowz(const T& bp, bufferlist& bl) { + size_t l = bp.length(); + small_encode_varint_lowz(l, bl); + bl.append(bp); +} +template +inline void small_decode_buf_lowz(T& bp, bufferlist::iterator& p) { + size_t l; + small_decode_varint_lowz(l, p); + p.copy(l, bp); +} + +// STL containers + +template +inline void small_encode_obj(const std::vector& v, bufferlist& bl) { + size_t n = v.size(); + small_encode_varint(n, bl); + for (auto p = v.cbegin(); p != v.cend(); ++p) { + p->encode(bl); + } +} +template +inline void small_decode_obj(std::vector& v, bufferlist::iterator& p) { + size_t n; + small_decode_varint(n, p); + v.clear(); + while (n--) { + v.push_back(T()); + v.back().decode(p); + } +} + +#endif diff --git a/src/test/encoding.cc b/src/test/encoding.cc index da4d6fb04ef0..5469c42109ac 100644 --- a/src/test/encoding.cc +++ b/src/test/encoding.cc @@ -1,6 +1,7 @@ #include "common/config.h" #include "include/buffer.h" #include "include/encoding.h" +#include "include/small_encoding.h" #include "gtest/gtest.h" @@ -223,3 +224,177 @@ TEST(EncodingException, Macros) { } } } + + +TEST(small_encoding, varint) { + uint32_t v[][4] = { + /* value, varint bytes, signed varint bytes, signed varint bytes (neg) */ + {0, 1, 1, 1}, + {1, 1, 1, 1}, + {2, 1, 1, 1}, + {31, 1, 1, 1}, + {32, 1, 1, 1}, + {0xff, 2, 2, 2}, + {0x100, 2, 2, 2}, + {0xfff, 2, 2, 2}, + {0x1000, 2, 2, 2}, + {0x2000, 2, 3, 3}, + {0x3fff, 2, 3, 3}, + {0x4000, 3, 3, 3}, + {0x4001, 3, 3, 3}, + {0x10001, 3, 3, 3}, + {0x20001, 3, 3, 3}, + {0x40001, 3, 3, 3}, + {0x80001, 3, 3, 3}, + {0x7f0001, 4, 4, 4}, + {0xff00001, 4, 5, 5}, + {0x1ff00001, 5, 5, 5}, + {0xffff0001, 5, 5, 5}, + {0xffffffff, 5, 5, 5}, + {1074790401, 5, 5, 5}, + {0, 0, 0, 0} + }; + for (unsigned i=0; v[i][1]; ++i) { + { + bufferlist bl; + small_encode_varint(v[i][0], bl); + cout << std::hex << v[i][0] << "\t" << v[i][1] << "\t"; + bl.hexdump(cout, false); + cout << std::endl; + ASSERT_EQ(bl.length(), v[i][1]); + uint32_t u; + auto p = bl.begin(); + small_decode_varint(u, p); + ASSERT_EQ(v[i][0], u); + } + { + bufferlist bl; + small_encode_signed_varint(v[i][0], bl); + cout << std::hex << v[i][0] << "\t" << v[i][2] << "\t"; + bl.hexdump(cout, false); + cout << std::endl; + ASSERT_EQ(bl.length(), v[i][2]); + int32_t u; + auto p = bl.begin(); + small_decode_signed_varint(u, p); + ASSERT_EQ((int32_t)v[i][0], u); + } + { + bufferlist bl; + int64_t x = -(int64_t)v[i][0]; + small_encode_signed_varint(x, bl); + cout << std::dec << x << std::hex << "\t" << v[i][3] << "\t"; + bl.hexdump(cout, false); + cout << std::endl; + ASSERT_EQ(bl.length(), v[i][3]); + int64_t u; + auto p = bl.begin(); + small_decode_signed_varint(u, p); + ASSERT_EQ(x, u); + } + } +} + +TEST(small_encoding, varint_lowz) { + uint32_t v[][4] = { + /* value, bytes encoded */ + {0, 1, 1, 1}, + {1, 1, 1, 1}, + {2, 1, 1, 1}, + {15, 1, 1, 1}, + {16, 1, 1, 1}, + {31, 1, 2, 2}, + {63, 2, 2, 2}, + {64, 1, 1, 1}, + {0xff, 2, 2, 2}, + {0x100, 1, 1, 1}, + {0x7ff, 2, 2, 2}, + {0xfff, 2, 3, 3}, + {0x1000, 1, 1, 1}, + {0x4000, 1, 1, 1}, + {0x8000, 1, 1, 1}, + {0x10000, 1, 2, 2}, + {0x20000, 2, 2, 2}, + {0x40000, 2, 2, 2}, + {0x80000, 2, 2, 2}, + {0x7f0000, 2, 2, 2}, + {0xffff0000, 4, 4, 4}, + {0xffffffff, 5, 5, 5}, + {0, 0, 0, 0} + }; + for (unsigned i=0; v[i][1]; ++i) { + { + bufferlist bl; + small_encode_varint_lowz(v[i][0], bl); + cout << std::hex << v[i][0] << "\t" << v[i][1] << "\t"; + bl.hexdump(cout, false); + cout << std::endl; + ASSERT_EQ(bl.length(), v[i][1]); + uint32_t u; + auto p = bl.begin(); + small_decode_varint_lowz(u, p); + ASSERT_EQ(v[i][0], u); + } + { + bufferlist bl; + int64_t x = v[i][0]; + small_encode_signed_varint_lowz(x, bl); + cout << std::hex << x << "\t" << v[i][1] << "\t"; + bl.hexdump(cout, false); + cout << std::endl; + ASSERT_EQ(bl.length(), v[i][2]); + int64_t u; + auto p = bl.begin(); + small_decode_signed_varint_lowz(u, p); + ASSERT_EQ(x, u); + } + { + bufferlist bl; + int64_t x = -(int64_t)v[i][0]; + small_encode_signed_varint_lowz(x, bl); + cout << std::dec << x << "\t" << v[i][1] << "\t"; + bl.hexdump(cout, false); + cout << std::endl; + ASSERT_EQ(bl.length(), v[i][3]); + int64_t u; + auto p = bl.begin(); + small_decode_signed_varint_lowz(u, p); + ASSERT_EQ(x, u); + } + } +} + +TEST(small_encoding, lba) { + uint64_t v[][2] = { + /* value, bytes encoded */ + {0, 4}, + {1, 4}, + {0xff, 4}, + {0x10000, 4}, + {0x7f0000, 4}, + {0xffff0000, 4}, + {0x0fffffff, 4}, + {0x1fffffff, 5}, + {0xffffffff, 5}, + {0x3fffffff000, 4}, + {0x7fffffff000, 5}, + {0x1fffffff0000, 4}, + {0x3fffffff0000, 5}, + {0xfffffff00000, 4}, + {0x1fffffff00000, 5}, + {0, 0} + }; + for (unsigned i=0; v[i][1]; ++i) { + bufferlist bl; + small_encode_lba(v[i][0], bl); + cout << std::hex << v[i][0] << "\t" << v[i][1] << "\t"; + bl.hexdump(cout, false); + cout << std::endl; + ASSERT_EQ(bl.length(), v[i][1]); + uint64_t u; + auto p = bl.begin(); + small_decode_lba(u, p); + ASSERT_EQ(v[i][0], u); + } + +}