From 688cc25755ed946394976585298099c1a754134c Mon Sep 17 00:00:00 2001 From: Casey Bodley Date: Fri, 5 Jan 2018 12:36:18 -0500 Subject: [PATCH] common: add streaming interfaces for json/xml escaping adds stream output operators that escape json/xml strings without having to allocate a separate output buffer Signed-off-by: Casey Bodley --- src/CMakeLists.txt | 2 +- src/common/{escape.c => escape.cc} | 89 ++++++++++++++++++++++++++++++ src/common/escape.h | 24 ++++++-- src/test/escape.cc | 36 ++++++++++++ 4 files changed, 144 insertions(+), 7 deletions(-) rename src/common/{escape.c => escape.cc} (67%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3c5d70867b1..4d74c4db9c5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -425,7 +425,7 @@ set(libcommon_files common/bloom_filter.cc common/Readahead.cc common/cmdparse.cc - common/escape.c + common/escape.cc common/url_escape.cc common/io_priority.cc common/Clock.cc diff --git a/src/common/escape.c b/src/common/escape.cc similarity index 67% rename from src/common/escape.c rename to src/common/escape.cc index 0d318e5f430..78659b0bfb2 100644 --- a/src/common/escape.c +++ b/src/common/escape.cc @@ -16,6 +16,8 @@ #include #include +#include +#include /* * Some functions for escaping RGW responses @@ -112,6 +114,60 @@ void escape_xml_attr(const char *buf, char *out) *o = '\0'; } +// applies hex formatting on construction, restores on destruction +struct hex_formatter { + std::ostream& out; + const char old_fill; + const std::ostream::fmtflags old_flags; + + hex_formatter(std::ostream& out) + : out(out), + old_fill(out.fill('0')), + old_flags(out.setf(out.hex, out.basefield)) + {} + ~hex_formatter() { + out.fill(old_fill); + out.flags(old_flags); + } +}; + +std::ostream& operator<<(std::ostream& out, const xml_stream_escaper& e) +{ + boost::optional fmt; + + for (unsigned char c : e.str) { + switch (c) { + case '<': + out << LESS_THAN_XESCAPE; + break; + case '&': + out << AMPERSAND_XESCAPE; + break; + case '>': + out << GREATER_THAN_XESCAPE; + break; + case '\'': + out << SGL_QUOTE_XESCAPE; + break; + case '"': + out << DBL_QUOTE_XESCAPE; + break; + default: + // Escape control characters. + if (((c < 0x20) && (c != 0x09) && (c != 0x0a)) || (c == 0x7f)) { + if (!fmt) { + fmt.emplace(out); // enable hex formatting + } + out << "&#x" << std::setw(2) << static_cast(c) << ';'; + } else { + out << c; + } + break; + } + } + return out; +} + #define DBL_QUOTE_JESCAPE "\\\"" #define BACKSLASH_JESCAPE "\\\\" #define TAB_JESCAPE "\\t" @@ -196,3 +252,36 @@ void escape_json_attr(const char *buf, int src_len, char *out) *o = '\0'; } +std::ostream& operator<<(std::ostream& out, const json_stream_escaper& e) +{ + boost::optional fmt; + + for (unsigned char c : e.str) { + switch (c) { + case '"': + out << DBL_QUOTE_JESCAPE; + break; + case '\\': + out << BACKSLASH_JESCAPE; + break; + case '\t': + out << TAB_JESCAPE; + break; + case '\n': + out << NEWLINE_JESCAPE; + break; + default: + // Escape control characters. + if ((c < 0x20) || (c == 0x7f)) { + if (!fmt) { + fmt.emplace(out); // enable hex formatting + } + out << "\\u" << std::setw(4) << static_cast(c); + } else { + out << c; + } + break; + } + } + return out; +} diff --git a/src/common/escape.h b/src/common/escape.h index ebdf16fd1bf..277a208174c 100644 --- a/src/common/escape.h +++ b/src/common/escape.h @@ -15,9 +15,8 @@ #ifndef CEPH_RGW_ESCAPE_H #define CEPH_RGW_ESCAPE_H -#ifdef __cplusplus -extern "C" { -#endif +#include +#include /* Returns the length of a buffer that would be needed to escape 'buf' * as an XML attrribute @@ -45,8 +44,21 @@ void escape_json_attr(const char *buf, int src_len, char *out); * require this, Amazon does it in their XML responses. */ -#ifdef __cplusplus -} -#endif +// stream output operators that write escaped text without making a copy +// usage: +// std::string xml_input = ...; +// std::cout << xml_stream_escaper(xml_input) << std::endl; + +struct xml_stream_escaper { + boost::string_view str; + xml_stream_escaper(boost::string_view str) : str(str) {} +}; +std::ostream& operator<<(std::ostream& out, const xml_stream_escaper& e); + +struct json_stream_escaper { + boost::string_view str; + json_stream_escaper(boost::string_view str) : str(str) {} +}; +std::ostream& operator<<(std::ostream& out, const json_stream_escaper& e); #endif diff --git a/src/test/escape.cc b/src/test/escape.cc index ac0efdafaed..82591cba1e1 100644 --- a/src/test/escape.cc +++ b/src/test/escape.cc @@ -22,31 +22,48 @@ static std::string escape_xml_attrs(const char *str) escape_xml_attr(str, out); return out; } +static std::string escape_xml_stream(const char *str) +{ + std::stringstream ss; + ss << xml_stream_escaper(str); + return ss.str(); +} TEST(EscapeXml, PassThrough) { ASSERT_EQ(escape_xml_attrs("simplicity itself"), "simplicity itself"); + ASSERT_EQ(escape_xml_stream("simplicity itself"), "simplicity itself"); ASSERT_EQ(escape_xml_attrs(""), ""); + ASSERT_EQ(escape_xml_stream(""), ""); ASSERT_EQ(escape_xml_attrs("simple examples please!"), "simple examples please!"); + ASSERT_EQ(escape_xml_stream("simple examples please!"), "simple examples please!"); } TEST(EscapeXml, EntityRefs1) { ASSERT_EQ(escape_xml_attrs("The \"scare quotes\""), "The "scare quotes""); + ASSERT_EQ(escape_xml_stream("The \"scare quotes\""), "The "scare quotes""); ASSERT_EQ(escape_xml_attrs("I <3 XML"), "I <3 XML"); + ASSERT_EQ(escape_xml_stream("I <3 XML"), "I <3 XML"); ASSERT_EQ(escape_xml_attrs("Some 'single' \"quotes\" here"), "Some 'single' "quotes" here"); + ASSERT_EQ(escape_xml_stream("Some 'single' \"quotes\" here"), + "Some 'single' "quotes" here"); } TEST(EscapeXml, ControlChars) { ASSERT_EQ(escape_xml_attrs("\x01\x02\x03"), ""); + ASSERT_EQ(escape_xml_stream("\x01\x02\x03"), ""); ASSERT_EQ(escape_xml_attrs("abc\x7f"), "abc"); + ASSERT_EQ(escape_xml_stream("abc\x7f"), "abc"); } TEST(EscapeXml, Utf8) { const char *cc1 = "\xe6\xb1\x89\xe5\xad\x97\n"; ASSERT_EQ(escape_xml_attrs(cc1), cc1); + ASSERT_EQ(escape_xml_stream(cc1), cc1); ASSERT_EQ(escape_xml_attrs("<\xe6\xb1\x89\xe5\xad\x97>\n"), "<\xe6\xb1\x89\xe5\xad\x97>\n"); + ASSERT_EQ(escape_xml_stream("<\xe6\xb1\x89\xe5\xad\x97>\n"), "<\xe6\xb1\x89\xe5\xad\x97>\n"); } static std::string escape_json_attrs(const char *str) @@ -57,29 +74,48 @@ static std::string escape_json_attrs(const char *str) escape_json_attr(str, src_len, out); return out; } +static std::string escape_json_stream(const char *str) +{ + std::stringstream ss; + ss << json_stream_escaper(str); + return ss.str(); +} TEST(EscapeJson, PassThrough) { ASSERT_EQ(escape_json_attrs("simplicity itself"), "simplicity itself"); + ASSERT_EQ(escape_json_stream("simplicity itself"), "simplicity itself"); ASSERT_EQ(escape_json_attrs(""), ""); + ASSERT_EQ(escape_json_stream(""), ""); ASSERT_EQ(escape_json_attrs("simple examples please!"), "simple examples please!"); + ASSERT_EQ(escape_json_stream("simple examples please!"), "simple examples please!"); } TEST(EscapeJson, Escapes1) { ASSERT_EQ(escape_json_attrs("The \"scare quotes\""), "The \\\"scare quotes\\\""); + ASSERT_EQ(escape_json_stream("The \"scare quotes\""), + "The \\\"scare quotes\\\""); ASSERT_EQ(escape_json_attrs("I <3 JSON"), "I <3 JSON"); + ASSERT_EQ(escape_json_stream("I <3 JSON"), "I <3 JSON"); ASSERT_EQ(escape_json_attrs("Some 'single' \"quotes\" here"), "Some 'single' \\\"quotes\\\" here"); + ASSERT_EQ(escape_json_stream("Some 'single' \"quotes\" here"), + "Some 'single' \\\"quotes\\\" here"); ASSERT_EQ(escape_json_attrs("tabs\tand\tnewlines\n, oh my"), "tabs\\tand\\tnewlines\\n, oh my"); + ASSERT_EQ(escape_json_stream("tabs\tand\tnewlines\n, oh my"), + "tabs\\tand\\tnewlines\\n, oh my"); } TEST(EscapeJson, ControlChars) { ASSERT_EQ(escape_json_attrs("\x01\x02\x03"), "\\u0001\\u0002\\u0003"); + ASSERT_EQ(escape_json_stream("\x01\x02\x03"), "\\u0001\\u0002\\u0003"); ASSERT_EQ(escape_json_attrs("abc\x7f"), "abc\\u007f"); + ASSERT_EQ(escape_json_stream("abc\x7f"), "abc\\u007f"); } TEST(EscapeJson, Utf8) { EXPECT_EQ(escape_json_attrs("\xe6\xb1\x89\xe5\xad\x97\n"), "\xe6\xb1\x89\xe5\xad\x97\\n"); + EXPECT_EQ(escape_json_stream("\xe6\xb1\x89\xe5\xad\x97\n"), "\xe6\xb1\x89\xe5\xad\x97\\n"); } -- 2.39.5