From: Casey Bodley Date: Fri, 5 Jan 2018 17:36:18 +0000 (-0500) Subject: common: add streaming interfaces for json/xml escaping X-Git-Tag: v13.0.2~642^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F19806%2Fhead;p=ceph.git common: add streaming interfaces for json/xml escaping adds stream output operators that escape json/xml strings without having to allocate a separate output buffer Signed-off-by: Casey Bodley --- diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3c5d70867b17..4d74c4db9c53 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -425,7 +425,7 @@ set(libcommon_files common/bloom_filter.cc common/Readahead.cc common/cmdparse.cc - common/escape.c + common/escape.cc common/url_escape.cc common/io_priority.cc common/Clock.cc diff --git a/src/common/escape.c b/src/common/escape.c deleted file mode 100644 index 0d318e5f4306..000000000000 --- a/src/common/escape.c +++ /dev/null @@ -1,198 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2011 New Dream Network - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#include "common/escape.h" - -#include -#include - -/* - * Some functions for escaping RGW responses - */ - -/* Static string length */ -#define SSTRL(x) ((sizeof(x)/sizeof(x[0])) - 1) - -#define LESS_THAN_XESCAPE "<" -#define AMPERSAND_XESCAPE "&" -#define GREATER_THAN_XESCAPE ">" -#define SGL_QUOTE_XESCAPE "'" -#define DBL_QUOTE_XESCAPE """ - -int escape_xml_attr_len(const char *buf) -{ - const char *b; - int ret = 0; - for (b = buf; *b; ++b) { - unsigned char c = *b; - switch (c) { - case '<': - ret += SSTRL(LESS_THAN_XESCAPE); - break; - case '&': - ret += SSTRL(AMPERSAND_XESCAPE); - break; - case '>': - ret += SSTRL(GREATER_THAN_XESCAPE); - break; - case '\'': - ret += SSTRL(SGL_QUOTE_XESCAPE); - break; - case '"': - ret += SSTRL(DBL_QUOTE_XESCAPE); - break; - default: - // Escape control characters. - if (((c < 0x20) && (c != 0x09) && (c != 0x0a)) || - (c == 0x7f)) { - ret += 6; - } - else { - ret++; - } - } - } - // leave room for null terminator - ret++; - return ret; -} - -void escape_xml_attr(const char *buf, char *out) -{ - char *o = out; - const char *b; - for (b = buf; *b; ++b) { - unsigned char c = *b; - switch (c) { - case '<': - memcpy(o, LESS_THAN_XESCAPE, SSTRL(LESS_THAN_XESCAPE)); - o += SSTRL(LESS_THAN_XESCAPE); - break; - case '&': - memcpy(o, AMPERSAND_XESCAPE, SSTRL(AMPERSAND_XESCAPE)); - o += SSTRL(AMPERSAND_XESCAPE); - break; - case '>': - memcpy(o, GREATER_THAN_XESCAPE, SSTRL(GREATER_THAN_XESCAPE)); - o += SSTRL(GREATER_THAN_XESCAPE); - break; - case '\'': - memcpy(o, SGL_QUOTE_XESCAPE, SSTRL(SGL_QUOTE_XESCAPE)); - o += SSTRL(SGL_QUOTE_XESCAPE); - break; - case '"': - memcpy(o, DBL_QUOTE_XESCAPE, SSTRL(DBL_QUOTE_XESCAPE)); - o += SSTRL(DBL_QUOTE_XESCAPE); - break; - default: - // Escape control characters. - if (((c < 0x20) && (c != 0x09) && (c != 0x0a)) || - (c == 0x7f)) { - snprintf(o, 7, "&#x%02x;", c); - o += 6; - } - else { - *o++ = c; - } - break; - } - } - // null terminator - *o = '\0'; -} - -#define DBL_QUOTE_JESCAPE "\\\"" -#define BACKSLASH_JESCAPE "\\\\" -#define TAB_JESCAPE "\\t" -#define NEWLINE_JESCAPE "\\n" - -int escape_json_attr_len(const char *buf, int src_len) -{ - const char *b; - int ret = 0; - int i; - for (i = 0, b = buf; i < src_len; ++i, ++b) { - unsigned char c = *b; - switch (c) { - case '"': - ret += SSTRL(DBL_QUOTE_JESCAPE); - break; - case '\\': - ret += SSTRL(BACKSLASH_JESCAPE); - break; - case '\t': - ret += SSTRL(TAB_JESCAPE); - break; - case '\n': - ret += SSTRL(NEWLINE_JESCAPE); - break; - default: - // Escape control characters. - if ((c < 0x20) || (c == 0x7f)) { - ret += 6; - } - else { - ret++; - } - } - } - // leave room for null terminator - ret++; - return ret; -} - -void escape_json_attr(const char *buf, int src_len, char *out) -{ - char *o = out; - const char *b; - int i; - for (i = 0, b = buf; i < src_len; ++i, ++b) { - unsigned char c = *b; - switch (c) { - case '"': - // cppcheck-suppress invalidFunctionArg - memcpy(o, DBL_QUOTE_JESCAPE, SSTRL(DBL_QUOTE_JESCAPE)); - o += SSTRL(DBL_QUOTE_JESCAPE); - break; - case '\\': - // cppcheck-suppress invalidFunctionArg - memcpy(o, BACKSLASH_JESCAPE, SSTRL(BACKSLASH_JESCAPE)); - o += SSTRL(BACKSLASH_JESCAPE); - break; - case '\t': - // cppcheck-suppress invalidFunctionArg - memcpy(o, TAB_JESCAPE, SSTRL(TAB_JESCAPE)); - o += SSTRL(TAB_JESCAPE); - break; - case '\n': - // cppcheck-suppress invalidFunctionArg - memcpy(o, NEWLINE_JESCAPE, SSTRL(NEWLINE_JESCAPE)); - o += SSTRL(NEWLINE_JESCAPE); - break; - default: - // Escape control characters. - if ((c < 0x20) || (c == 0x7f)) { - snprintf(o, 7, "\\u%04x", c); - o += 6; - } - else { - *o++ = c; - } - break; - } - } - // null terminator - *o = '\0'; -} - diff --git a/src/common/escape.cc b/src/common/escape.cc new file mode 100644 index 000000000000..78659b0bfb29 --- /dev/null +++ b/src/common/escape.cc @@ -0,0 +1,287 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2011 New Dream Network + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "common/escape.h" + +#include +#include +#include +#include + +/* + * Some functions for escaping RGW responses + */ + +/* Static string length */ +#define SSTRL(x) ((sizeof(x)/sizeof(x[0])) - 1) + +#define LESS_THAN_XESCAPE "<" +#define AMPERSAND_XESCAPE "&" +#define GREATER_THAN_XESCAPE ">" +#define SGL_QUOTE_XESCAPE "'" +#define DBL_QUOTE_XESCAPE """ + +int escape_xml_attr_len(const char *buf) +{ + const char *b; + int ret = 0; + for (b = buf; *b; ++b) { + unsigned char c = *b; + switch (c) { + case '<': + ret += SSTRL(LESS_THAN_XESCAPE); + break; + case '&': + ret += SSTRL(AMPERSAND_XESCAPE); + break; + case '>': + ret += SSTRL(GREATER_THAN_XESCAPE); + break; + case '\'': + ret += SSTRL(SGL_QUOTE_XESCAPE); + break; + case '"': + ret += SSTRL(DBL_QUOTE_XESCAPE); + break; + default: + // Escape control characters. + if (((c < 0x20) && (c != 0x09) && (c != 0x0a)) || + (c == 0x7f)) { + ret += 6; + } + else { + ret++; + } + } + } + // leave room for null terminator + ret++; + return ret; +} + +void escape_xml_attr(const char *buf, char *out) +{ + char *o = out; + const char *b; + for (b = buf; *b; ++b) { + unsigned char c = *b; + switch (c) { + case '<': + memcpy(o, LESS_THAN_XESCAPE, SSTRL(LESS_THAN_XESCAPE)); + o += SSTRL(LESS_THAN_XESCAPE); + break; + case '&': + memcpy(o, AMPERSAND_XESCAPE, SSTRL(AMPERSAND_XESCAPE)); + o += SSTRL(AMPERSAND_XESCAPE); + break; + case '>': + memcpy(o, GREATER_THAN_XESCAPE, SSTRL(GREATER_THAN_XESCAPE)); + o += SSTRL(GREATER_THAN_XESCAPE); + break; + case '\'': + memcpy(o, SGL_QUOTE_XESCAPE, SSTRL(SGL_QUOTE_XESCAPE)); + o += SSTRL(SGL_QUOTE_XESCAPE); + break; + case '"': + memcpy(o, DBL_QUOTE_XESCAPE, SSTRL(DBL_QUOTE_XESCAPE)); + o += SSTRL(DBL_QUOTE_XESCAPE); + break; + default: + // Escape control characters. + if (((c < 0x20) && (c != 0x09) && (c != 0x0a)) || + (c == 0x7f)) { + snprintf(o, 7, "&#x%02x;", c); + o += 6; + } + else { + *o++ = c; + } + break; + } + } + // null terminator + *o = '\0'; +} + +// applies hex formatting on construction, restores on destruction +struct hex_formatter { + std::ostream& out; + const char old_fill; + const std::ostream::fmtflags old_flags; + + hex_formatter(std::ostream& out) + : out(out), + old_fill(out.fill('0')), + old_flags(out.setf(out.hex, out.basefield)) + {} + ~hex_formatter() { + out.fill(old_fill); + out.flags(old_flags); + } +}; + +std::ostream& operator<<(std::ostream& out, const xml_stream_escaper& e) +{ + boost::optional fmt; + + for (unsigned char c : e.str) { + switch (c) { + case '<': + out << LESS_THAN_XESCAPE; + break; + case '&': + out << AMPERSAND_XESCAPE; + break; + case '>': + out << GREATER_THAN_XESCAPE; + break; + case '\'': + out << SGL_QUOTE_XESCAPE; + break; + case '"': + out << DBL_QUOTE_XESCAPE; + break; + default: + // Escape control characters. + if (((c < 0x20) && (c != 0x09) && (c != 0x0a)) || (c == 0x7f)) { + if (!fmt) { + fmt.emplace(out); // enable hex formatting + } + out << "&#x" << std::setw(2) << static_cast(c) << ';'; + } else { + out << c; + } + break; + } + } + return out; +} + +#define DBL_QUOTE_JESCAPE "\\\"" +#define BACKSLASH_JESCAPE "\\\\" +#define TAB_JESCAPE "\\t" +#define NEWLINE_JESCAPE "\\n" + +int escape_json_attr_len(const char *buf, int src_len) +{ + const char *b; + int ret = 0; + int i; + for (i = 0, b = buf; i < src_len; ++i, ++b) { + unsigned char c = *b; + switch (c) { + case '"': + ret += SSTRL(DBL_QUOTE_JESCAPE); + break; + case '\\': + ret += SSTRL(BACKSLASH_JESCAPE); + break; + case '\t': + ret += SSTRL(TAB_JESCAPE); + break; + case '\n': + ret += SSTRL(NEWLINE_JESCAPE); + break; + default: + // Escape control characters. + if ((c < 0x20) || (c == 0x7f)) { + ret += 6; + } + else { + ret++; + } + } + } + // leave room for null terminator + ret++; + return ret; +} + +void escape_json_attr(const char *buf, int src_len, char *out) +{ + char *o = out; + const char *b; + int i; + for (i = 0, b = buf; i < src_len; ++i, ++b) { + unsigned char c = *b; + switch (c) { + case '"': + // cppcheck-suppress invalidFunctionArg + memcpy(o, DBL_QUOTE_JESCAPE, SSTRL(DBL_QUOTE_JESCAPE)); + o += SSTRL(DBL_QUOTE_JESCAPE); + break; + case '\\': + // cppcheck-suppress invalidFunctionArg + memcpy(o, BACKSLASH_JESCAPE, SSTRL(BACKSLASH_JESCAPE)); + o += SSTRL(BACKSLASH_JESCAPE); + break; + case '\t': + // cppcheck-suppress invalidFunctionArg + memcpy(o, TAB_JESCAPE, SSTRL(TAB_JESCAPE)); + o += SSTRL(TAB_JESCAPE); + break; + case '\n': + // cppcheck-suppress invalidFunctionArg + memcpy(o, NEWLINE_JESCAPE, SSTRL(NEWLINE_JESCAPE)); + o += SSTRL(NEWLINE_JESCAPE); + break; + default: + // Escape control characters. + if ((c < 0x20) || (c == 0x7f)) { + snprintf(o, 7, "\\u%04x", c); + o += 6; + } + else { + *o++ = c; + } + break; + } + } + // null terminator + *o = '\0'; +} + +std::ostream& operator<<(std::ostream& out, const json_stream_escaper& e) +{ + boost::optional fmt; + + for (unsigned char c : e.str) { + switch (c) { + case '"': + out << DBL_QUOTE_JESCAPE; + break; + case '\\': + out << BACKSLASH_JESCAPE; + break; + case '\t': + out << TAB_JESCAPE; + break; + case '\n': + out << NEWLINE_JESCAPE; + break; + default: + // Escape control characters. + if ((c < 0x20) || (c == 0x7f)) { + if (!fmt) { + fmt.emplace(out); // enable hex formatting + } + out << "\\u" << std::setw(4) << static_cast(c); + } else { + out << c; + } + break; + } + } + return out; +} diff --git a/src/common/escape.h b/src/common/escape.h index ebdf16fd1bf2..277a208174c6 100644 --- a/src/common/escape.h +++ b/src/common/escape.h @@ -15,9 +15,8 @@ #ifndef CEPH_RGW_ESCAPE_H #define CEPH_RGW_ESCAPE_H -#ifdef __cplusplus -extern "C" { -#endif +#include +#include /* Returns the length of a buffer that would be needed to escape 'buf' * as an XML attrribute @@ -45,8 +44,21 @@ void escape_json_attr(const char *buf, int src_len, char *out); * require this, Amazon does it in their XML responses. */ -#ifdef __cplusplus -} -#endif +// stream output operators that write escaped text without making a copy +// usage: +// std::string xml_input = ...; +// std::cout << xml_stream_escaper(xml_input) << std::endl; + +struct xml_stream_escaper { + boost::string_view str; + xml_stream_escaper(boost::string_view str) : str(str) {} +}; +std::ostream& operator<<(std::ostream& out, const xml_stream_escaper& e); + +struct json_stream_escaper { + boost::string_view str; + json_stream_escaper(boost::string_view str) : str(str) {} +}; +std::ostream& operator<<(std::ostream& out, const json_stream_escaper& e); #endif diff --git a/src/test/escape.cc b/src/test/escape.cc index ac0efdafaed0..82591cba1e19 100644 --- a/src/test/escape.cc +++ b/src/test/escape.cc @@ -22,31 +22,48 @@ static std::string escape_xml_attrs(const char *str) escape_xml_attr(str, out); return out; } +static std::string escape_xml_stream(const char *str) +{ + std::stringstream ss; + ss << xml_stream_escaper(str); + return ss.str(); +} TEST(EscapeXml, PassThrough) { ASSERT_EQ(escape_xml_attrs("simplicity itself"), "simplicity itself"); + ASSERT_EQ(escape_xml_stream("simplicity itself"), "simplicity itself"); ASSERT_EQ(escape_xml_attrs(""), ""); + ASSERT_EQ(escape_xml_stream(""), ""); ASSERT_EQ(escape_xml_attrs("simple examples please!"), "simple examples please!"); + ASSERT_EQ(escape_xml_stream("simple examples please!"), "simple examples please!"); } TEST(EscapeXml, EntityRefs1) { ASSERT_EQ(escape_xml_attrs("The \"scare quotes\""), "The "scare quotes""); + ASSERT_EQ(escape_xml_stream("The \"scare quotes\""), "The "scare quotes""); ASSERT_EQ(escape_xml_attrs("I <3 XML"), "I <3 XML"); + ASSERT_EQ(escape_xml_stream("I <3 XML"), "I <3 XML"); ASSERT_EQ(escape_xml_attrs("Some 'single' \"quotes\" here"), "Some 'single' "quotes" here"); + ASSERT_EQ(escape_xml_stream("Some 'single' \"quotes\" here"), + "Some 'single' "quotes" here"); } TEST(EscapeXml, ControlChars) { ASSERT_EQ(escape_xml_attrs("\x01\x02\x03"), ""); + ASSERT_EQ(escape_xml_stream("\x01\x02\x03"), ""); ASSERT_EQ(escape_xml_attrs("abc\x7f"), "abc"); + ASSERT_EQ(escape_xml_stream("abc\x7f"), "abc"); } TEST(EscapeXml, Utf8) { const char *cc1 = "\xe6\xb1\x89\xe5\xad\x97\n"; ASSERT_EQ(escape_xml_attrs(cc1), cc1); + ASSERT_EQ(escape_xml_stream(cc1), cc1); ASSERT_EQ(escape_xml_attrs("<\xe6\xb1\x89\xe5\xad\x97>\n"), "<\xe6\xb1\x89\xe5\xad\x97>\n"); + ASSERT_EQ(escape_xml_stream("<\xe6\xb1\x89\xe5\xad\x97>\n"), "<\xe6\xb1\x89\xe5\xad\x97>\n"); } static std::string escape_json_attrs(const char *str) @@ -57,29 +74,48 @@ static std::string escape_json_attrs(const char *str) escape_json_attr(str, src_len, out); return out; } +static std::string escape_json_stream(const char *str) +{ + std::stringstream ss; + ss << json_stream_escaper(str); + return ss.str(); +} TEST(EscapeJson, PassThrough) { ASSERT_EQ(escape_json_attrs("simplicity itself"), "simplicity itself"); + ASSERT_EQ(escape_json_stream("simplicity itself"), "simplicity itself"); ASSERT_EQ(escape_json_attrs(""), ""); + ASSERT_EQ(escape_json_stream(""), ""); ASSERT_EQ(escape_json_attrs("simple examples please!"), "simple examples please!"); + ASSERT_EQ(escape_json_stream("simple examples please!"), "simple examples please!"); } TEST(EscapeJson, Escapes1) { ASSERT_EQ(escape_json_attrs("The \"scare quotes\""), "The \\\"scare quotes\\\""); + ASSERT_EQ(escape_json_stream("The \"scare quotes\""), + "The \\\"scare quotes\\\""); ASSERT_EQ(escape_json_attrs("I <3 JSON"), "I <3 JSON"); + ASSERT_EQ(escape_json_stream("I <3 JSON"), "I <3 JSON"); ASSERT_EQ(escape_json_attrs("Some 'single' \"quotes\" here"), "Some 'single' \\\"quotes\\\" here"); + ASSERT_EQ(escape_json_stream("Some 'single' \"quotes\" here"), + "Some 'single' \\\"quotes\\\" here"); ASSERT_EQ(escape_json_attrs("tabs\tand\tnewlines\n, oh my"), "tabs\\tand\\tnewlines\\n, oh my"); + ASSERT_EQ(escape_json_stream("tabs\tand\tnewlines\n, oh my"), + "tabs\\tand\\tnewlines\\n, oh my"); } TEST(EscapeJson, ControlChars) { ASSERT_EQ(escape_json_attrs("\x01\x02\x03"), "\\u0001\\u0002\\u0003"); + ASSERT_EQ(escape_json_stream("\x01\x02\x03"), "\\u0001\\u0002\\u0003"); ASSERT_EQ(escape_json_attrs("abc\x7f"), "abc\\u007f"); + ASSERT_EQ(escape_json_stream("abc\x7f"), "abc\\u007f"); } TEST(EscapeJson, Utf8) { EXPECT_EQ(escape_json_attrs("\xe6\xb1\x89\xe5\xad\x97\n"), "\xe6\xb1\x89\xe5\xad\x97\\n"); + EXPECT_EQ(escape_json_stream("\xe6\xb1\x89\xe5\xad\x97\n"), "\xe6\xb1\x89\xe5\xad\x97\\n"); }