]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
common: add streaming interfaces for json/xml escaping
authorCasey Bodley <cbodley@redhat.com>
Fri, 5 Jan 2018 17:36:18 +0000 (12:36 -0500)
committerCasey Bodley <cbodley@redhat.com>
Fri, 5 Jan 2018 18:26:25 +0000 (13:26 -0500)
adds stream output operators that escape json/xml strings without having
to allocate a separate output buffer

Signed-off-by: Casey Bodley <cbodley@redhat.com>
src/CMakeLists.txt
src/common/escape.c [deleted file]
src/common/escape.cc [new file with mode: 0644]
src/common/escape.h
src/test/escape.cc

index 3c5d70867b17906ca6b4e23e2a49d59936dd2d7e..4d74c4db9c53fc47ab8bc049a68f1a4b2bc51010 100644 (file)
@@ -425,7 +425,7 @@ set(libcommon_files
   common/bloom_filter.cc
   common/Readahead.cc
   common/cmdparse.cc
-  common/escape.c
+  common/escape.cc
   common/url_escape.cc
   common/io_priority.cc
   common/Clock.cc
diff --git a/src/common/escape.c b/src/common/escape.c
deleted file mode 100644 (file)
index 0d318e5..0000000
+++ /dev/null
@@ -1,198 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-/*
- * Ceph - scalable distributed file system
- *
- * Copyright (C) 2011 New Dream Network
- *
- * This is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software
- * Foundation.  See file COPYING.
- *
- */
-
-#include "common/escape.h"
-
-#include <stdio.h>
-#include <string.h>
-
-/*
- * Some functions for escaping RGW responses
- */
-
-/* Static string length */
-#define SSTRL(x) ((sizeof(x)/sizeof(x[0])) - 1)
-
-#define LESS_THAN_XESCAPE              "&lt;"
-#define AMPERSAND_XESCAPE              "&amp;"
-#define GREATER_THAN_XESCAPE           "&gt;"
-#define SGL_QUOTE_XESCAPE              "&apos;"
-#define DBL_QUOTE_XESCAPE              "&quot;"
-
-int escape_xml_attr_len(const char *buf)
-{
-       const char *b;
-       int ret = 0;
-       for (b = buf; *b; ++b) {
-               unsigned char c = *b;
-               switch (c) {
-               case '<':
-                       ret += SSTRL(LESS_THAN_XESCAPE);
-                       break;
-               case '&':
-                       ret += SSTRL(AMPERSAND_XESCAPE);
-                       break;
-               case '>':
-                       ret += SSTRL(GREATER_THAN_XESCAPE);
-                       break;
-               case '\'':
-                       ret += SSTRL(SGL_QUOTE_XESCAPE);
-                       break;
-               case '"':
-                       ret += SSTRL(DBL_QUOTE_XESCAPE);
-                       break;
-               default:
-                       // Escape control characters.
-                       if (((c < 0x20) && (c != 0x09) && (c != 0x0a)) ||
-                                   (c == 0x7f)) {
-                               ret += 6;
-                       }
-                       else {
-                               ret++;
-                       }
-               }
-       }
-       // leave room for null terminator
-       ret++;
-       return ret;
-}
-
-void escape_xml_attr(const char *buf, char *out)
-{
-       char *o = out;
-       const char *b;
-       for (b = buf; *b; ++b) {
-               unsigned char c = *b;
-               switch (c) {
-               case '<':
-                       memcpy(o, LESS_THAN_XESCAPE, SSTRL(LESS_THAN_XESCAPE));
-                       o += SSTRL(LESS_THAN_XESCAPE);
-                       break;
-               case '&':
-                       memcpy(o, AMPERSAND_XESCAPE, SSTRL(AMPERSAND_XESCAPE));
-                       o += SSTRL(AMPERSAND_XESCAPE);
-                       break;
-               case '>':
-                       memcpy(o, GREATER_THAN_XESCAPE, SSTRL(GREATER_THAN_XESCAPE));
-                       o += SSTRL(GREATER_THAN_XESCAPE);
-                       break;
-               case '\'':
-                       memcpy(o, SGL_QUOTE_XESCAPE, SSTRL(SGL_QUOTE_XESCAPE));
-                       o += SSTRL(SGL_QUOTE_XESCAPE);
-                       break;
-               case '"':
-                       memcpy(o, DBL_QUOTE_XESCAPE, SSTRL(DBL_QUOTE_XESCAPE));
-                       o += SSTRL(DBL_QUOTE_XESCAPE);
-                       break;
-               default:
-                       // Escape control characters.
-                       if (((c < 0x20) && (c != 0x09) && (c != 0x0a)) ||
-                                   (c == 0x7f)) {
-                               snprintf(o, 7, "&#x%02x;", c);
-                               o += 6;
-                       }
-                       else {
-                               *o++ = c;
-                       }
-                       break;
-               }
-       }
-       // null terminator
-       *o = '\0';
-}
-
-#define DBL_QUOTE_JESCAPE "\\\""
-#define BACKSLASH_JESCAPE "\\\\"
-#define TAB_JESCAPE "\\t"
-#define NEWLINE_JESCAPE "\\n"
-
-int escape_json_attr_len(const char *buf, int src_len)
-{
-       const char *b;
-       int ret = 0;
-       int i;
-       for (i = 0, b = buf; i < src_len; ++i, ++b) {
-               unsigned char c = *b;
-               switch (c) {
-               case '"':
-                       ret += SSTRL(DBL_QUOTE_JESCAPE);
-                       break;
-               case '\\':
-                       ret += SSTRL(BACKSLASH_JESCAPE);
-                       break;
-               case '\t':
-                       ret += SSTRL(TAB_JESCAPE);
-                       break;
-               case '\n':
-                       ret += SSTRL(NEWLINE_JESCAPE);
-                       break;
-               default:
-                       // Escape control characters.
-                       if ((c < 0x20) || (c == 0x7f)) {
-                               ret += 6;
-                       }
-                       else {
-                               ret++;
-                       }
-               }
-       }
-       // leave room for null terminator
-       ret++;
-       return ret;
-}
-
-void escape_json_attr(const char *buf, int src_len, char *out)
-{
-       char *o = out;
-       const char *b;
-       int i;
-       for (i = 0, b = buf; i < src_len; ++i, ++b) {
-               unsigned char c = *b;
-               switch (c) {
-               case '"':
-                       // cppcheck-suppress invalidFunctionArg
-                       memcpy(o, DBL_QUOTE_JESCAPE, SSTRL(DBL_QUOTE_JESCAPE));
-                       o += SSTRL(DBL_QUOTE_JESCAPE);
-                       break;
-               case '\\':
-                       // cppcheck-suppress invalidFunctionArg
-                       memcpy(o, BACKSLASH_JESCAPE, SSTRL(BACKSLASH_JESCAPE));
-                       o += SSTRL(BACKSLASH_JESCAPE);
-                       break;
-               case '\t':
-                       // cppcheck-suppress invalidFunctionArg
-                       memcpy(o, TAB_JESCAPE, SSTRL(TAB_JESCAPE));
-                       o += SSTRL(TAB_JESCAPE);
-                       break;
-               case '\n':
-                       // cppcheck-suppress invalidFunctionArg
-                       memcpy(o, NEWLINE_JESCAPE, SSTRL(NEWLINE_JESCAPE));
-                       o += SSTRL(NEWLINE_JESCAPE);
-                       break;
-               default:
-                       // Escape control characters.
-                       if ((c < 0x20) || (c == 0x7f)) {
-                               snprintf(o, 7, "\\u%04x", c);
-                               o += 6;
-                       }
-                       else {
-                               *o++ = c;
-                       }
-                       break;
-               }
-       }
-       // null terminator
-       *o = '\0';
-}
-
diff --git a/src/common/escape.cc b/src/common/escape.cc
new file mode 100644 (file)
index 0000000..78659b0
--- /dev/null
@@ -0,0 +1,287 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2011 New Dream Network
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#include "common/escape.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <iomanip>
+#include <boost/optional.hpp>
+
+/*
+ * Some functions for escaping RGW responses
+ */
+
+/* Static string length */
+#define SSTRL(x) ((sizeof(x)/sizeof(x[0])) - 1)
+
+#define LESS_THAN_XESCAPE              "&lt;"
+#define AMPERSAND_XESCAPE              "&amp;"
+#define GREATER_THAN_XESCAPE           "&gt;"
+#define SGL_QUOTE_XESCAPE              "&apos;"
+#define DBL_QUOTE_XESCAPE              "&quot;"
+
+int escape_xml_attr_len(const char *buf)
+{
+       const char *b;
+       int ret = 0;
+       for (b = buf; *b; ++b) {
+               unsigned char c = *b;
+               switch (c) {
+               case '<':
+                       ret += SSTRL(LESS_THAN_XESCAPE);
+                       break;
+               case '&':
+                       ret += SSTRL(AMPERSAND_XESCAPE);
+                       break;
+               case '>':
+                       ret += SSTRL(GREATER_THAN_XESCAPE);
+                       break;
+               case '\'':
+                       ret += SSTRL(SGL_QUOTE_XESCAPE);
+                       break;
+               case '"':
+                       ret += SSTRL(DBL_QUOTE_XESCAPE);
+                       break;
+               default:
+                       // Escape control characters.
+                       if (((c < 0x20) && (c != 0x09) && (c != 0x0a)) ||
+                                   (c == 0x7f)) {
+                               ret += 6;
+                       }
+                       else {
+                               ret++;
+                       }
+               }
+       }
+       // leave room for null terminator
+       ret++;
+       return ret;
+}
+
+void escape_xml_attr(const char *buf, char *out)
+{
+       char *o = out;
+       const char *b;
+       for (b = buf; *b; ++b) {
+               unsigned char c = *b;
+               switch (c) {
+               case '<':
+                       memcpy(o, LESS_THAN_XESCAPE, SSTRL(LESS_THAN_XESCAPE));
+                       o += SSTRL(LESS_THAN_XESCAPE);
+                       break;
+               case '&':
+                       memcpy(o, AMPERSAND_XESCAPE, SSTRL(AMPERSAND_XESCAPE));
+                       o += SSTRL(AMPERSAND_XESCAPE);
+                       break;
+               case '>':
+                       memcpy(o, GREATER_THAN_XESCAPE, SSTRL(GREATER_THAN_XESCAPE));
+                       o += SSTRL(GREATER_THAN_XESCAPE);
+                       break;
+               case '\'':
+                       memcpy(o, SGL_QUOTE_XESCAPE, SSTRL(SGL_QUOTE_XESCAPE));
+                       o += SSTRL(SGL_QUOTE_XESCAPE);
+                       break;
+               case '"':
+                       memcpy(o, DBL_QUOTE_XESCAPE, SSTRL(DBL_QUOTE_XESCAPE));
+                       o += SSTRL(DBL_QUOTE_XESCAPE);
+                       break;
+               default:
+                       // Escape control characters.
+                       if (((c < 0x20) && (c != 0x09) && (c != 0x0a)) ||
+                                   (c == 0x7f)) {
+                               snprintf(o, 7, "&#x%02x;", c);
+                               o += 6;
+                       }
+                       else {
+                               *o++ = c;
+                       }
+                       break;
+               }
+       }
+       // null terminator
+       *o = '\0';
+}
+
+// applies hex formatting on construction, restores on destruction
+struct hex_formatter {
+  std::ostream& out;
+  const char old_fill;
+  const std::ostream::fmtflags old_flags;
+
+  hex_formatter(std::ostream& out)
+    : out(out),
+      old_fill(out.fill('0')),
+      old_flags(out.setf(out.hex, out.basefield))
+  {}
+  ~hex_formatter() {
+    out.fill(old_fill);
+    out.flags(old_flags);
+  }
+};
+
+std::ostream& operator<<(std::ostream& out, const xml_stream_escaper& e)
+{
+  boost::optional<hex_formatter> fmt;
+
+  for (unsigned char c : e.str) {
+    switch (c) {
+    case '<':
+      out << LESS_THAN_XESCAPE;
+      break;
+    case '&':
+      out << AMPERSAND_XESCAPE;
+      break;
+    case '>':
+      out << GREATER_THAN_XESCAPE;
+      break;
+    case '\'':
+      out << SGL_QUOTE_XESCAPE;
+      break;
+    case '"':
+      out << DBL_QUOTE_XESCAPE;
+      break;
+    default:
+      // Escape control characters.
+      if (((c < 0x20) && (c != 0x09) && (c != 0x0a)) || (c == 0x7f)) {
+        if (!fmt) {
+          fmt.emplace(out); // enable hex formatting
+        }
+        out << "&#x" << std::setw(2) << static_cast<unsigned int>(c) << ';';
+      } else {
+        out << c;
+      }
+      break;
+    }
+  }
+  return out;
+}
+
+#define DBL_QUOTE_JESCAPE "\\\""
+#define BACKSLASH_JESCAPE "\\\\"
+#define TAB_JESCAPE "\\t"
+#define NEWLINE_JESCAPE "\\n"
+
+int escape_json_attr_len(const char *buf, int src_len)
+{
+       const char *b;
+       int ret = 0;
+       int i;
+       for (i = 0, b = buf; i < src_len; ++i, ++b) {
+               unsigned char c = *b;
+               switch (c) {
+               case '"':
+                       ret += SSTRL(DBL_QUOTE_JESCAPE);
+                       break;
+               case '\\':
+                       ret += SSTRL(BACKSLASH_JESCAPE);
+                       break;
+               case '\t':
+                       ret += SSTRL(TAB_JESCAPE);
+                       break;
+               case '\n':
+                       ret += SSTRL(NEWLINE_JESCAPE);
+                       break;
+               default:
+                       // Escape control characters.
+                       if ((c < 0x20) || (c == 0x7f)) {
+                               ret += 6;
+                       }
+                       else {
+                               ret++;
+                       }
+               }
+       }
+       // leave room for null terminator
+       ret++;
+       return ret;
+}
+
+void escape_json_attr(const char *buf, int src_len, char *out)
+{
+       char *o = out;
+       const char *b;
+       int i;
+       for (i = 0, b = buf; i < src_len; ++i, ++b) {
+               unsigned char c = *b;
+               switch (c) {
+               case '"':
+                       // cppcheck-suppress invalidFunctionArg
+                       memcpy(o, DBL_QUOTE_JESCAPE, SSTRL(DBL_QUOTE_JESCAPE));
+                       o += SSTRL(DBL_QUOTE_JESCAPE);
+                       break;
+               case '\\':
+                       // cppcheck-suppress invalidFunctionArg
+                       memcpy(o, BACKSLASH_JESCAPE, SSTRL(BACKSLASH_JESCAPE));
+                       o += SSTRL(BACKSLASH_JESCAPE);
+                       break;
+               case '\t':
+                       // cppcheck-suppress invalidFunctionArg
+                       memcpy(o, TAB_JESCAPE, SSTRL(TAB_JESCAPE));
+                       o += SSTRL(TAB_JESCAPE);
+                       break;
+               case '\n':
+                       // cppcheck-suppress invalidFunctionArg
+                       memcpy(o, NEWLINE_JESCAPE, SSTRL(NEWLINE_JESCAPE));
+                       o += SSTRL(NEWLINE_JESCAPE);
+                       break;
+               default:
+                       // Escape control characters.
+                       if ((c < 0x20) || (c == 0x7f)) {
+                               snprintf(o, 7, "\\u%04x", c);
+                               o += 6;
+                       }
+                       else {
+                               *o++ = c;
+                       }
+                       break;
+               }
+       }
+       // null terminator
+       *o = '\0';
+}
+
+std::ostream& operator<<(std::ostream& out, const json_stream_escaper& e)
+{
+  boost::optional<hex_formatter> fmt;
+
+  for (unsigned char c : e.str) {
+    switch (c) {
+    case '"':
+      out << DBL_QUOTE_JESCAPE;
+      break;
+    case '\\':
+      out << BACKSLASH_JESCAPE;
+      break;
+    case '\t':
+      out << TAB_JESCAPE;
+      break;
+    case '\n':
+      out << NEWLINE_JESCAPE;
+      break;
+    default:
+      // Escape control characters.
+      if ((c < 0x20) || (c == 0x7f)) {
+        if (!fmt) {
+          fmt.emplace(out); // enable hex formatting
+        }
+        out << "\\u" << std::setw(4) << static_cast<unsigned int>(c);
+      } else {
+        out << c;
+      }
+      break;
+    }
+  }
+  return out;
+}
index ebdf16fd1bf26bb799d25452aa984cd50521d53b..277a208174c6364a18689a21040e4f7c4dbde081 100644 (file)
@@ -15,9 +15,8 @@
 #ifndef CEPH_RGW_ESCAPE_H
 #define CEPH_RGW_ESCAPE_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
+#include <ostream>
+#include <boost/utility/string_view.hpp>
 
 /* Returns the length of a buffer that would be needed to escape 'buf'
  * as an XML attrribute
@@ -45,8 +44,21 @@ void escape_json_attr(const char *buf, int src_len, char *out);
  * require this, Amazon does it in their XML responses.
  */
 
-#ifdef __cplusplus
-}
-#endif
+// stream output operators that write escaped text without making a copy
+// usage:
+//   std::string xml_input = ...;
+//   std::cout << xml_stream_escaper(xml_input) << std::endl;
+
+struct xml_stream_escaper {
+  boost::string_view str;
+  xml_stream_escaper(boost::string_view str) : str(str) {}
+};
+std::ostream& operator<<(std::ostream& out, const xml_stream_escaper& e);
+
+struct json_stream_escaper {
+  boost::string_view str;
+  json_stream_escaper(boost::string_view str) : str(str) {}
+};
+std::ostream& operator<<(std::ostream& out, const json_stream_escaper& e);
 
 #endif
index ac0efdafaed0f56b808c05372d612ed0ed92e728..82591cba1e199402059bd1ee3c7d978881e84bf9 100644 (file)
@@ -22,31 +22,48 @@ static std::string escape_xml_attrs(const char *str)
   escape_xml_attr(str, out);
   return out;
 }
+static std::string escape_xml_stream(const char *str)
+{
+  std::stringstream ss;
+  ss << xml_stream_escaper(str);
+  return ss.str();
+}
 
 TEST(EscapeXml, PassThrough) {
   ASSERT_EQ(escape_xml_attrs("simplicity itself"), "simplicity itself");
+  ASSERT_EQ(escape_xml_stream("simplicity itself"), "simplicity itself");
   ASSERT_EQ(escape_xml_attrs(""), "");
+  ASSERT_EQ(escape_xml_stream(""), "");
   ASSERT_EQ(escape_xml_attrs("simple examples please!"), "simple examples please!");
+  ASSERT_EQ(escape_xml_stream("simple examples please!"), "simple examples please!");
 }
 
 TEST(EscapeXml, EntityRefs1) {
   ASSERT_EQ(escape_xml_attrs("The \"scare quotes\""), "The &quot;scare quotes&quot;");
+  ASSERT_EQ(escape_xml_stream("The \"scare quotes\""), "The &quot;scare quotes&quot;");
   ASSERT_EQ(escape_xml_attrs("I <3 XML"), "I &lt;3 XML");
+  ASSERT_EQ(escape_xml_stream("I <3 XML"), "I &lt;3 XML");
   ASSERT_EQ(escape_xml_attrs("Some 'single' \"quotes\" here"),
            "Some &apos;single&apos; &quot;quotes&quot; here");
+  ASSERT_EQ(escape_xml_stream("Some 'single' \"quotes\" here"),
+           "Some &apos;single&apos; &quot;quotes&quot; here");
 }
 
 TEST(EscapeXml, ControlChars) {
   ASSERT_EQ(escape_xml_attrs("\x01\x02\x03"), "&#x01;&#x02;&#x03;");
+  ASSERT_EQ(escape_xml_stream("\x01\x02\x03"), "&#x01;&#x02;&#x03;");
 
   ASSERT_EQ(escape_xml_attrs("abc\x7f"), "abc&#x7f;");
+  ASSERT_EQ(escape_xml_stream("abc\x7f"), "abc&#x7f;");
 }
 
 TEST(EscapeXml, Utf8) {
   const char *cc1 = "\xe6\xb1\x89\xe5\xad\x97\n";
   ASSERT_EQ(escape_xml_attrs(cc1), cc1);
+  ASSERT_EQ(escape_xml_stream(cc1), cc1);
 
   ASSERT_EQ(escape_xml_attrs("<\xe6\xb1\x89\xe5\xad\x97>\n"), "&lt;\xe6\xb1\x89\xe5\xad\x97&gt;\n");
+  ASSERT_EQ(escape_xml_stream("<\xe6\xb1\x89\xe5\xad\x97>\n"), "&lt;\xe6\xb1\x89\xe5\xad\x97&gt;\n");
 }
 
 static std::string escape_json_attrs(const char *str)
@@ -57,29 +74,48 @@ static std::string escape_json_attrs(const char *str)
   escape_json_attr(str, src_len, out);
   return out;
 }
+static std::string escape_json_stream(const char *str)
+{
+  std::stringstream ss;
+  ss << json_stream_escaper(str);
+  return ss.str();
+}
 
 TEST(EscapeJson, PassThrough) {
   ASSERT_EQ(escape_json_attrs("simplicity itself"), "simplicity itself");
+  ASSERT_EQ(escape_json_stream("simplicity itself"), "simplicity itself");
   ASSERT_EQ(escape_json_attrs(""), "");
+  ASSERT_EQ(escape_json_stream(""), "");
   ASSERT_EQ(escape_json_attrs("simple examples please!"), "simple examples please!");
+  ASSERT_EQ(escape_json_stream("simple examples please!"), "simple examples please!");
 }
 
 TEST(EscapeJson, Escapes1) {
   ASSERT_EQ(escape_json_attrs("The \"scare quotes\""),
                             "The \\\"scare quotes\\\"");
+  ASSERT_EQ(escape_json_stream("The \"scare quotes\""),
+                             "The \\\"scare quotes\\\"");
   ASSERT_EQ(escape_json_attrs("I <3 JSON"), "I <3 JSON");
+  ASSERT_EQ(escape_json_stream("I <3 JSON"), "I <3 JSON");
   ASSERT_EQ(escape_json_attrs("Some 'single' \"quotes\" here"),
       "Some 'single' \\\"quotes\\\" here");
+  ASSERT_EQ(escape_json_stream("Some 'single' \"quotes\" here"),
+      "Some 'single' \\\"quotes\\\" here");
   ASSERT_EQ(escape_json_attrs("tabs\tand\tnewlines\n, oh my"),
       "tabs\\tand\\tnewlines\\n, oh my");
+  ASSERT_EQ(escape_json_stream("tabs\tand\tnewlines\n, oh my"),
+      "tabs\\tand\\tnewlines\\n, oh my");
 }
 
 TEST(EscapeJson, ControlChars) {
   ASSERT_EQ(escape_json_attrs("\x01\x02\x03"), "\\u0001\\u0002\\u0003");
+  ASSERT_EQ(escape_json_stream("\x01\x02\x03"), "\\u0001\\u0002\\u0003");
 
   ASSERT_EQ(escape_json_attrs("abc\x7f"), "abc\\u007f");
+  ASSERT_EQ(escape_json_stream("abc\x7f"), "abc\\u007f");
 }
 
 TEST(EscapeJson, Utf8) {
   EXPECT_EQ(escape_json_attrs("\xe6\xb1\x89\xe5\xad\x97\n"), "\xe6\xb1\x89\xe5\xad\x97\\n");
+  EXPECT_EQ(escape_json_stream("\xe6\xb1\x89\xe5\xad\x97\n"), "\xe6\xb1\x89\xe5\xad\x97\\n");
 }