json_spirit: use utf8 intenally when parsing \uHHHH

author Tim Serong <tserong@suse.com>

Fri, 1 May 2015 15:59:53 +0000 (01:59 +1000)

committer Nathan Cutler <ncutler@suse.cz>

Fri, 15 May 2015 10:24:45 +0000 (12:24 +0200)
author Tim Serong <tserong@suse.com>
Fri, 1 May 2015 15:59:53 +0000 (01:59 +1000)
committer Nathan Cutler <ncutler@suse.cz>
Fri, 15 May 2015 10:24:45 +0000 (12:24 +0200)
diff --git a/src/json_spirit/json_spirit_reader_template.h b/src/json_spirit/json_spirit_reader_template.h

index f87b59331b73999c984b9b3d9226ad985e37200d..2eaf743efae4c15d53de4620a66d0f33907dc463 100644 (file)
--- a/src/json_spirit/json_spirit_reader_template.h
+++ b/src/json_spirit/json_spirit_reader_template.h
@@ -13,6 +13,8 @@
  #include "json_spirit_value.h"\r
  #include "json_spirit_error_position.h"\r
  \r
+#include "common/utf8.h"\r
+\r
  #define BOOST_SPIRIT_THREADSAFE  // uncomment for multithreaded use, requires linking to boost.thread\r
  \r
  #include <boost/bind.hpp>\r
@@ -71,18 +73,30 @@ namespace json_spirit
          return ( hex_to_num( c1 ) << 4 ) + hex_to_num( c2 );\r
      }       \r
  \r
-    template< class Char_type, class Iter_type >\r
-    Char_type unicode_str_to_char( Iter_type& begin )\r
+    template< class String_type, class Iter_type >\r
+    String_type unicode_str_to_utf8( Iter_type& begin );\r
+\r
+    template<>\r
+    std::string unicode_str_to_utf8( std::string::const_iterator & begin )\r
      {\r
+        typedef typename std::string::value_type Char_type;\r
+\r
          const Char_type c1( *( ++begin ) );\r
          const Char_type c2( *( ++begin ) );\r
          const Char_type c3( *( ++begin ) );\r
          const Char_type c4( *( ++begin ) );\r
  \r
-        return ( hex_to_num( c1 ) << 12 ) + \r
-               ( hex_to_num( c2 ) <<  8 ) + \r
-               ( hex_to_num( c3 ) <<  4 ) + \r
-               hex_to_num( c4 );\r
+        unsigned long uc = ( hex_to_num( c1 ) << 12 ) + \r
+                           ( hex_to_num( c2 ) <<  8 ) + \r
+                           ( hex_to_num( c3 ) <<  4 ) + \r
+                           hex_to_num( c4 );\r
+\r
+        unsigned char buf[7];  // MAX_UTF8_SZ is 6 (see src/common/utf8.c)\r
+        int r = encode_utf8(uc, buf);\r
+        if (r >= 0) {\r
+            return std::string(reinterpret_cast<char *>(buf), r);\r
+        }\r
+        return std::string("_");\r
      }\r
  \r
      template< class String_type >\r
@@ -116,7 +130,7 @@ namespace json_spirit
              {\r
                  if( end - begin >= 5 )  //  expecting "uHHHH..."\r
                  {\r
-                    s += unicode_str_to_char< Char_type >( begin );  \r
+                    s += unicode_str_to_utf8< String_type >( begin );\r
                  }\r
                  break;\r
              }\r
@@ -178,11 +192,15 @@ namespace json_spirit
          return get_str_< std::string >( begin, end );\r
      }\r
  \r
+// Need this guard else it tries to instantiate unicode_str_to_utf8 with a\r
+// std::wstring, which isn't presently implemented\r
+#if defined( JSON_SPIRIT_WMVALUE_ENABLED ) && !defined( BOOST_NO_STD_WSTRING )\r
      inline std::wstring get_str( std::wstring::const_iterator begin, std::wstring::const_iterator end )\r
      {\r
          return get_str_< std::wstring >( begin, end );\r
      }\r
-    \r
+#endif\r
+\r
      template< class String_type, class Iter_type >\r
      String_type get_str( Iter_type begin, Iter_type end )\r
      {\r
diff --git a/src/test/mon/osd-pool-create.sh b/src/test/mon/osd-pool-create.sh

index 428bfe06defb357e226f161600b6390375301e5c..8a57856cd61f58be1bde4c0ded0640d24f34d65e 100755 (executable)
--- a/src/test/mon/osd-pool-create.sh
+++ b/src/test/mon/osd-pool-create.sh
@@ -236,6 +236,21 @@ function TEST_no_pool_delete() {
      ./ceph osd pool delete foo foo --yes-i-really-really-mean-it
  }
  
+function TEST_utf8_cli() {
+    local dir=$1
+    run_mon $dir a --public-addr $CEPH_MON
+    # Hopefully it's safe to include literal UTF-8 characters to test
+    # the fix for http://tracker.ceph.com/issues/7387.  If it turns out
+    # to not be OK (when is the default encoding *not* UTF-8?), maybe
+    # the character '黄' can be replaced with the escape $'\xe9\xbb\x84'
+    ./ceph osd pool create 黄 1024 2>&1 | \
+        grep "pool '黄' created" || return 1
+    ./ceph osd lspools 2>&1 | \
+        grep "黄" || return 1
+    ./ceph -f json-pretty osd dump | \
+        python -c "import json; import sys; json.load(sys.stdin)" || return 1
+    ./ceph osd pool delete 黄 黄 --yes-i-really-really-mean-it
+}
  
  main osd-pool-create
author	Tim Serong <tserong@suse.com>
	Fri, 1 May 2015 15:59:53 +0000 (01:59 +1000)
committer	Nathan Cutler <ncutler@suse.cz>
	Fri, 15 May 2015 10:24:45 +0000 (12:24 +0200)
src/json_spirit/json_spirit_reader_template.h		patch \| blob \| history
src/test/mon/osd-pool-create.sh		patch \| blob \| history