]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
rgw: escape XML attributes
authorColin Patrick McCabe <cmccabe@alumni.cmu.edu>
Fri, 25 Mar 2011 00:29:21 +0000 (17:29 -0700)
committerColin Patrick McCabe <cmccabe@alumni.cmu.edu>
Fri, 25 Mar 2011 17:33:32 +0000 (10:33 -0700)
Signed-off-by: Colin McCabe <colin.mccabe@dreamhost.com>
src/Makefile.am
src/rgw/rgw_escape.c [new file with mode: 0644]
src/rgw/rgw_escape.h [new file with mode: 0644]
src/rgw/rgw_formats.cc
src/rgw/rgw_rest_os.cc
src/rgw/rgw_rest_s3.cc
src/test/rgw_escape.cc [new file with mode: 0644]

index 7e216ef3f31825e55dc7add6545548727bf60ffa..3b0ad569e5eb1633f02f0488f122fc6d8fde31a9 100644 (file)
@@ -300,7 +300,8 @@ libradosgw_a_SOURCES = \
        rgw/rgw_os.cc \
        rgw/rgw_os_auth.cc \
        rgw/rgw_formats.cc \
-       rgw/rgw_log.cc
+       rgw/rgw_log.cc \
+       rgw/rgw_escape.c
 
 libradosgw_a_CFLAGS = ${AM_CFLAGS}
 libradosgw_a_CXXFLAGS = ${CRYPTO_CXXFLAGS} ${AM_CXXFLAGS}
@@ -426,6 +427,12 @@ unittest_utf8_LDADD = libcommon.a ${UNITTEST_LDADD}
 unittest_utf8_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
 check_PROGRAMS += unittest_utf8
 
+unittest_rgw_escape_SOURCES = test/rgw_escape.cc rgw/rgw_escape.c
+unittest_rgw_escape_LDFLAGS = -pthread ${AM_LDFLAGS}
+unittest_rgw_escape_LDADD = libcommon.a ${UNITTEST_LDADD}
+unittest_rgw_escape_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
+check_PROGRAMS += unittest_rgw_escape
+
 # shell scripts
 editpaths = sed \
        -e 's|@bindir[@]|$(bindir)|g' \
@@ -1040,6 +1047,7 @@ noinst_HEADERS = \
        rgw/rgw_rest_os.h\
        rgw/rgw_rest_s3.h\
        rgw/rgw_user.h\
+       rgw/rgw_escape.h\
        sample.ceph.conf\
        tools/common.h\
        tools/gui.h\
diff --git a/src/rgw/rgw_escape.c b/src/rgw/rgw_escape.c
new file mode 100644 (file)
index 0000000..085123e
--- /dev/null
@@ -0,0 +1,113 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2011 New Dream Network
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#include "rgw/rgw_escape.h"
+
+#include <stdio.h>
+#include <string.h>
+
+/*
+ * Some functions for escaping RGW responses
+ */
+
+/* Static string length */
+#define SSTRL(x) ((sizeof(x)/sizeof(x[0])) - 1)
+
+#define LESS_THAN_ESCAPE               "&lt;"
+#define AMPERSAND_ESCAPE               "&amp;"
+#define GREATER_THAN_ESCAPE            "&gt;"
+#define SGL_QUOTE_ESCAPE               "&apos;"
+#define DBL_QUOTE_ESCAPE               "&quot;"
+
+int escape_xml_attr_len(const char *buf)
+{
+       const char *b;
+       int ret = 0;
+       for (b = buf; *b; ++b) {
+               char c = *b;
+               switch (c) {
+               case '<':
+                       ret += SSTRL(LESS_THAN_ESCAPE);
+                       break;
+               case '&':
+                       ret += SSTRL(AMPERSAND_ESCAPE);
+                       break;
+               case '>':
+                       ret += SSTRL(GREATER_THAN_ESCAPE);
+                       break;
+               case '\'':
+                       ret += SSTRL(SGL_QUOTE_ESCAPE);
+                       break;
+               case '"':
+                       ret += SSTRL(DBL_QUOTE_ESCAPE);
+                       break;
+               default:
+                       // Escape control characters.
+                       if (((c < 0x20) && (c != 0x09) && (c != 0x0a)) ||
+                                   (c == 0x7f)) {
+                               ret += 6;
+                       }
+                       else {
+                               ret++;
+                       }
+               }
+       }
+       // leave room for null terminator
+       ret++;
+       return ret;
+}
+
+void escape_xml_attr(const char *buf, char *out)
+{
+       char *o = out;
+       const char *b;
+       for (b = buf; *b; ++b) {
+               char c = *b;
+               switch (c) {
+               case '<':
+                       memcpy(o, LESS_THAN_ESCAPE, SSTRL(LESS_THAN_ESCAPE));
+                       o += SSTRL(LESS_THAN_ESCAPE);
+                       break;
+               case '&':
+                       memcpy(o, AMPERSAND_ESCAPE, SSTRL(AMPERSAND_ESCAPE));
+                       o += SSTRL(AMPERSAND_ESCAPE);
+                       break;
+               case '>':
+                       memcpy(o, GREATER_THAN_ESCAPE, SSTRL(GREATER_THAN_ESCAPE));
+                       o += SSTRL(GREATER_THAN_ESCAPE);
+                       break;
+               case '\'':
+                       memcpy(o, SGL_QUOTE_ESCAPE, SSTRL(SGL_QUOTE_ESCAPE));
+                       o += SSTRL(SGL_QUOTE_ESCAPE);
+                       break;
+               case '"':
+                       memcpy(o, DBL_QUOTE_ESCAPE, SSTRL(DBL_QUOTE_ESCAPE));
+                       o += SSTRL(DBL_QUOTE_ESCAPE);
+                       break;
+               default:
+                       // Escape control characters.
+                       if (((c < 0x20) && (c != 0x09) && (c != 0x0a)) ||
+                                   (c == 0x7f)) {
+                               sprintf(o, "&#x%02x;", c);
+                               o += 6;
+                       }
+                       else {
+                               *o++ = c;
+                       }
+                       break;
+               }
+       }
+       // null terminator
+       *o = '\0';
+}
diff --git a/src/rgw/rgw_escape.h b/src/rgw/rgw_escape.h
new file mode 100644 (file)
index 0000000..7a227fe
--- /dev/null
@@ -0,0 +1,41 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2011 New Dream Network
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#ifndef CEPH_RGW_ESCAPE_H
+#define CEPH_RGW_ESCAPE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Returns the length of a buffer that would be needed to escape 'buf'
+ * as an XML attrribute
+ */
+int escape_xml_attr_len(const char *buf);
+
+/* Escapes 'buf' as an XML attribute. Assumes that 'out' is at least long
+ * enough to fit the output. You can find out the required length by calling
+ * escape_xml_attr_len first.
+ */
+void escape_xml_attr(const char *buf, char *out);
+
+/* Note: we escape control characters. Although the XML spec doesn't actually
+ * require this, Amazon does it in their XML responses.
+ */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
index 960c0a0e76445ec410df37ff1d8f3271ae9ac663..18e60ad9414ebf8d976fb14539a0876ebc2dea70 100644 (file)
@@ -1,3 +1,4 @@
+#include "rgw_escape.h"
 #include "rgw_common.h"
 #include "rgw_formats.h"
 
@@ -120,7 +121,10 @@ void RGWFormatter_XML::dump_value_str(const char *name, const char *fmt, ...)
   va_end(ap);
   if (n >= LARGE_SIZE)
     return;
-  write_data("<%s>%s</%s>", name, buf, name);
+  int len = escape_xml_attr_len(buf);
+  char escaped[len];
+  escape_xml_attr(buf, escaped);
+  write_data("<%s>%s</%s>", name, escaped, name);
 }
 
 /* JSON */
index 6692589b5f14e0392cad54719867ebbc25139fa3..988e72c75fa934318362868bf38d6dd2d1624332 100644 (file)
@@ -81,7 +81,7 @@ void RGWListBucket_REST_OS::send_response()
     if (do_objs && (marker.empty() || iter->name.compare(marker) > 0)) {
       s->formatter->open_obj_section("object");
       s->formatter->dump_value_str("name", iter->name.c_str());
-      s->formatter->dump_value_str("hash", "&quot;%s&quot;", iter->etag);
+      s->formatter->dump_value_str("hash", "\"%s\"", iter->etag);
       s->formatter->dump_value_int("bytes", "%lld", iter->size);
       if (iter->content_type.size())
         s->formatter->dump_value_str("content_type", iter->content_type.c_str());
index a6d5fab454a8519274e1adf3dc133d4a20553708..70e208f02e08f83c28e2b94da539dec83507a328 100644 (file)
@@ -133,7 +133,7 @@ void RGWListBucket_REST_S3::send_response()
       s->formatter->open_array_section("Contents");
       s->formatter->dump_value_str("Key", iter->name.c_str());
       dump_time(s, "LastModified", &iter->mtime);
-      s->formatter->dump_value_str("ETag", "&quot;%s&quot;", iter->etag);
+      s->formatter->dump_value_str("ETag", "\"%s\"", iter->etag);
       s->formatter->dump_value_int("Size", "%lld", iter->size);
       s->formatter->dump_value_str("StorageClass", "STANDARD");
       dump_owner(s, s->user.user_id, s->user.display_name);
diff --git a/src/test/rgw_escape.cc b/src/test/rgw_escape.cc
new file mode 100644 (file)
index 0000000..3015470
--- /dev/null
@@ -0,0 +1,45 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2011 New Dream Network
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+#include "rgw/rgw_escape.h"
+#include "gtest/gtest.h"
+#include <stdint.h>
+
+std::string escape_xml_attr(const char *str)
+{
+  int len = escape_xml_attr_len(str);
+  char out[len];
+  escape_xml_attr(str, out);
+  return out;
+}
+
+TEST(EscapeXml, PassThrough) {
+  ASSERT_EQ(escape_xml_attr("simplicity itself"), "simplicity itself");
+  ASSERT_EQ(escape_xml_attr(""), "");
+  ASSERT_EQ(escape_xml_attr("simple examples please!"), "simple examples please!");
+}
+
+TEST(EscapeXml, EntityRefs1) {
+  ASSERT_EQ(escape_xml_attr("The \"scare quotes\""), "The &quot;scare quotes&quot;");
+  ASSERT_EQ(escape_xml_attr("I <3 XML"), "I &lt;3 XML");
+  ASSERT_EQ(escape_xml_attr("Some 'single' \"quotes\" here"),
+           "Some &apos;single&apos; &quot;quotes&quot; here");
+}
+
+TEST(EscapeXml, ControlChars) {
+  uint8_t cc1[] = { 0x01, 0x02, 0x03, 0x0 };
+  ASSERT_EQ(escape_xml_attr((char*)cc1), "&#x01;&#x02;&#x03;");
+
+  uint8_t cc2[] = { 0x61, 0x62, 0x63, 0x7f, 0x0 };
+  ASSERT_EQ(escape_xml_attr((char*)cc2), "abc&#x7f;");
+}