From f1f8b9f93b8c64a17c430a66e73e1e47a58781c7 Mon Sep 17 00:00:00 2001
From: Ulrich Weigand <ulrich.weigand@de.ibm.com>
Date: Fri, 4 Sep 2020 15:42:41 +0200
Subject: [PATCH] include/encoding: Fix encode/decode of float types on
 big-endian systems

Currently, floating-point types use "raw" encoding, which means they're
simply copied as byte stream.

This means that if the decoding happens on a machine that differs in
byte order from the source machine, the returned value will be
incorrect. As one effect of this problem, a big-endian OSD node cannot
join a cluster where the MON node is little-endian (or vice versa),
because the OSDMap (incremental) structure contains floating-point
values, and as a result of this conversion problem, the OSD node will
crash with an assertion failure as soon as it receives any OSDMap update
from the MON.

This should be fixed by always encoding floating-point values in
little-endian byte order just as is done for integers. (Note that this
still assumes source and target machines used the same floating-point
format except for byte order. But given that nearly all platforms these
days use IEEE binary32/binary64 for float/double, that seems a
reasonable assumption.)

Fixes: https://tracker.ceph.com/issues/47302

Signed-off-by: Ulrich Weigand <ulrich.weigand@de.ibm.com>
---
 src/include/encoding.h | 35 +++++++++++++++++++++++++++++++----
 1 file changed, 31 insertions(+), 4 deletions(-)
diff --git a/src/include/encoding.h b/src/include/encoding.h
index 2b3e723da22..5036f8710a4 100644
--- a/src/include/encoding.h
+++ b/src/include/encoding.h
@@ -92,10 +92,6 @@ WRITE_RAW_ENCODER(ceph_le64)
 WRITE_RAW_ENCODER(ceph_le32)
 WRITE_RAW_ENCODER(ceph_le16)
 
-// FIXME: we need to choose some portable floating point encoding here
-WRITE_RAW_ENCODER(float)
-WRITE_RAW_ENCODER(double)
-
 inline void encode(const bool &v, bufferlist& bl) {
   __u8 vv = v;
   encode_raw(vv, bl);
@@ -129,6 +125,37 @@ WRITE_INTTYPE_ENCODER(int32_t, le32)
 WRITE_INTTYPE_ENCODER(uint16_t, le16)
 WRITE_INTTYPE_ENCODER(int16_t, le16)
 
+// -----------------------------------
+// float types
+//
+// NOTE: The following code assumes all supported platforms use IEEE binary32
+// as float and IEEE binary64 as double floating-point format.  The assumption
+// is verified by the assertions below.
+//
+// Under this assumption, we can use raw encoding of floating-point types
+// on little-endian machines, but we still need to perform a byte swap
+// on big-endian machines to ensure cross-architecture compatibility.
+// To achive that, we reinterpret the values as integers first, which are
+// byte-swapped via the ceph_le types as above.  The extra conversions
+// are optimized away on little-endian machines by the compiler.
+#define WRITE_FLTTYPE_ENCODER(type, itype, etype)			\
+  static_assert(sizeof(type) == sizeof(itype));				\
+  static_assert(std::numeric_limits<type>::is_iec559,			\
+	      "floating-point type not using IEEE754 format");		\
+  inline void encode(type v, ::ceph::bufferlist& bl, uint64_t features=0) { \
+    ceph_##etype e;							\
+    e = *reinterpret_cast<itype *>(&v);					\
+    ::ceph::encode_raw(e, bl);						\
+  }									\
+  inline void decode(type &v, ::ceph::bufferlist::const_iterator& p) {	\
+    ceph_##etype e;							\
+    ::ceph::decode_raw(e, p);						\
+    *reinterpret_cast<itype *>(&v) = e;					\
+  }
+
+WRITE_FLTTYPE_ENCODER(float, uint32_t, le32)
+WRITE_FLTTYPE_ENCODER(double, uint64_t, le64)
+
 // see denc.h for ENCODE_DUMP_PATH discussion and definition.
 #ifdef ENCODE_DUMP_PATH
 # define ENCODE_DUMP_PRE()			\
-- 
2.39.5