From 750a9483d169bdc10ab6865492b9e399e40bc20c Mon Sep 17 00:00:00 2001 From: Lucian Petrut Date: Fri, 18 Aug 2023 12:25:52 +0000 Subject: [PATCH] common: Windows Unicode CLI support MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Windows CLI arguments use either ANSI (main()) or UTF-16 (wmain()). Meanwhile, Ceph libraries expect UTF-8 and raise exceptions when trying to use Unicode CLI arguments or log Unicode output: rbd.exe create test_unicode_șțăâ --size=32M terminate called after throwing an instance of 'std::runtime_error' what(): invalid utf8 We'll use a Windows application manifest, setting the "activeCodePage" property [1][2]. This enables the Windows UCRT UTF-8 mode so that functions that receive char* arguments will expect UTF-8 instead of ANSI, including main(). One exception is CreateProcess, which will need the UTF-16 form (CreateProcessW). Despite the locale being set to utf-8, we'll have to explicitly set the console output to utf-8 using SetConsoleOutputCP(CP_UTF8). In order to use the UTF-8 locale, we'll have to switch the mingw-llvm runtime from msvcrt to ucrt. This also fixes ceph-dokan crashes that currently occur when non-ANSI paths are logged. [1] https://learn.microsoft.com/en-us/windows/win32/sbscs/application-manifests#activecodepage [2] https://learn.microsoft.com/en-us/windows/apps/design/globalizing/use-utf8-code-page Signed-off-by: Lucian Petrut --- mingw_conf.sh | 4 ++-- src/common/win32/SubProcess.cc | 8 +++++--- src/common/win32/code_page.manifest | 8 ++++++++ src/common/win32/code_page.rc | 2 ++ src/dokan/CMakeLists.txt | 3 ++- src/dokan/ceph_dokan.cc | 2 ++ src/tools/CMakeLists.txt | 3 +++ src/tools/rados/rados.cc | 3 +++ src/tools/rbd/CMakeLists.txt | 3 +++ src/tools/rbd/rbd.cc | 3 +++ src/tools/rbd_wnbd/CMakeLists.txt | 5 ++++- src/tools/rbd_wnbd/rbd_wnbd.cc | 13 ++++++++----- win32_deps_build.sh | 4 ++-- 13 files changed, 47 insertions(+), 14 deletions(-) create mode 100644 src/common/win32/code_page.manifest create mode 100644 src/common/win32/code_page.rc diff --git a/mingw_conf.sh b/mingw_conf.sh index 6a226da5f04..a03eb95dda3 100644 --- a/mingw_conf.sh +++ b/mingw_conf.sh @@ -129,8 +129,8 @@ EOL if [[ -n $USE_MINGW_LLVM ]]; then cat >> $MINGW_CMAKE_FILE <(cmdline.str().c_str()), + if (!CreateProcessW( + NULL, const_cast(cmdline_w.c_str()), NULL, NULL, /* No special security attributes */ 1, /* Inherit handles marked as inheritable */ 0, /* No special flags */ diff --git a/src/common/win32/code_page.manifest b/src/common/win32/code_page.manifest new file mode 100644 index 00000000000..dab929e1515 --- /dev/null +++ b/src/common/win32/code_page.manifest @@ -0,0 +1,8 @@ + + + + + UTF-8 + + + diff --git a/src/common/win32/code_page.rc b/src/common/win32/code_page.rc new file mode 100644 index 00000000000..12258c4bd61 --- /dev/null +++ b/src/common/win32/code_page.rc @@ -0,0 +1,2 @@ +#include +CREATEPROCESS_MANIFEST_RESOURCE_ID RT_MANIFEST "code_page.manifest" diff --git a/src/dokan/CMakeLists.txt b/src/dokan/CMakeLists.txt index cc05a0f29f6..2a61d38bb8d 100644 --- a/src/dokan/CMakeLists.txt +++ b/src/dokan/CMakeLists.txt @@ -2,7 +2,8 @@ set(ceph_dokan_srcs ceph_dokan.cc dbg.cc utils.cc - options.cc) + options.cc + ../common/win32/code_page.rc) add_executable(ceph-dokan ${ceph_dokan_srcs}) target_link_libraries(ceph-dokan ${DOKAN_LIBRARIES} ${GSSAPI_LIBRARIES} diff --git a/src/dokan/ceph_dokan.cc b/src/dokan/ceph_dokan.cc index 9e115222cab..1ea82e07423 100644 --- a/src/dokan/ceph_dokan.cc +++ b/src/dokan/ceph_dokan.cc @@ -1043,6 +1043,8 @@ boost::intrusive_ptr do_global_init( int main(int argc, const char** argv) { + SetConsoleOutputCP(CP_UTF8); + if (!SetConsoleCtrlHandler((PHANDLER_ROUTINE)ConsoleHandler, TRUE)) { cerr << "Couldn't initialize console event handler." << std::endl; return -EINVAL; diff --git a/src/tools/CMakeLists.txt b/src/tools/CMakeLists.txt index d1133798c5c..993fadb2e2e 100644 --- a/src/tools/CMakeLists.txt +++ b/src/tools/CMakeLists.txt @@ -6,6 +6,9 @@ set(rados_srcs ${PROJECT_SOURCE_DIR}/src/common/util.cc ${PROJECT_SOURCE_DIR}/src/common/obj_bencher.cc ${PROJECT_SOURCE_DIR}/src/osd/ECUtil.cc) +if(WIN32) + list(APPEND rados_srcs ../common/win32/code_page.rc) +endif() add_executable(rados ${rados_srcs}) target_link_libraries(rados librados global ${BLKID_LIBRARIES} ${CMAKE_DL_LIBS}) diff --git a/src/tools/rados/rados.cc b/src/tools/rados/rados.cc index 3d602e749cd..b8cf5e4d1dc 100644 --- a/src/tools/rados/rados.cc +++ b/src/tools/rados/rados.cc @@ -4047,6 +4047,9 @@ static int rados_tool_common(const std::map < std::string, std::string > &opts, int main(int argc, const char **argv) { + #ifdef _WIN32 + SetConsoleOutputCP(CP_UTF8); + #endif auto args = argv_to_vec(argc, argv); if (args.empty()) { cerr << argv[0] << ": -h or --help for usage" << std::endl; diff --git a/src/tools/rbd/CMakeLists.txt b/src/tools/rbd/CMakeLists.txt index 19b4e806a75..dac1d8babf9 100644 --- a/src/tools/rbd/CMakeLists.txt +++ b/src/tools/rbd/CMakeLists.txt @@ -55,6 +55,9 @@ set(rbd_srcs action/Ubbd.cc action/Watch.cc action/Wnbd.cc) +if(WIN32) + list(APPEND rbd_srcs ../../common/win32/code_page.rc) +endif() add_executable(rbd ${rbd_srcs} $) diff --git a/src/tools/rbd/rbd.cc b/src/tools/rbd/rbd.cc index a8c59d57577..bdeded4a05b 100644 --- a/src/tools/rbd/rbd.cc +++ b/src/tools/rbd/rbd.cc @@ -5,6 +5,9 @@ int main(int argc, const char **argv) { + #ifdef _WIN32 + SetConsoleOutputCP(CP_UTF8); + #endif rbd::Shell shell; return shell.execute(argc, argv); } diff --git a/src/tools/rbd_wnbd/CMakeLists.txt b/src/tools/rbd_wnbd/CMakeLists.txt index 86c41b2eeb6..ff09cd80a15 100644 --- a/src/tools/rbd_wnbd/CMakeLists.txt +++ b/src/tools/rbd_wnbd/CMakeLists.txt @@ -1,4 +1,7 @@ -add_executable(rbd-wnbd rbd_wnbd.cc wnbd_handler.cc wnbd_wmi.cc) +add_executable( + rbd-wnbd + rbd_wnbd.cc wnbd_handler.cc wnbd_wmi.cc + ../../common/win32/code_page.rc) set_target_properties( rbd-wnbd PROPERTIES COMPILE_FLAGS "-fpermissive -I${WNBD_INCLUDE_DIRS}") diff --git a/src/tools/rbd_wnbd/rbd_wnbd.cc b/src/tools/rbd_wnbd/rbd_wnbd.cc index d2df88cabb4..1946e83ff96 100644 --- a/src/tools/rbd_wnbd/rbd_wnbd.cc +++ b/src/tools/rbd_wnbd/rbd_wnbd.cc @@ -331,7 +331,7 @@ int send_map_request(std::string arguments) { // which will allow it to communicate the mapping status int map_device_using_suprocess(std::string arguments, int timeout_ms) { - STARTUPINFO si; + STARTUPINFOW si; PROCESS_INFORMATION pi; char ch; DWORD err = 0, status = 0; @@ -407,11 +407,12 @@ int map_device_using_suprocess(std::string arguments, int timeout_ms) dout(5) << __func__ << ": command line: " << command_line.str() << dendl; - GetStartupInfo(&si); + GetStartupInfoW(&si); // Create a detached child - if (!CreateProcess(NULL, (char*)command_line.str().c_str(), - NULL, NULL, FALSE, DETACHED_PROCESS, - NULL, NULL, &si, &pi)) { + if (!CreateProcessW( + NULL, const_cast(to_wstring(command_line.str()).c_str()), + NULL, NULL, FALSE, DETACHED_PROCESS, + NULL, NULL, &si, &pi)) { err = GetLastError(); derr << "CreateProcess failed: " << win32_strerror(err) << dendl; exit_code = -ECHILD; @@ -1904,6 +1905,8 @@ int main(int argc, const char *argv[]) SetConsoleCtrlHandler(console_handler_routine, true); // Avoid the Windows Error Reporting dialog. SetErrorMode(GetErrorMode() | SEM_NOGPFAULTERRORBOX); + SetConsoleOutputCP(CP_UTF8); + int r = rbd_wnbd(argc, argv); if (r < 0) { return r; diff --git a/win32_deps_build.sh b/win32_deps_build.sh index 6eea81d1b8a..c61cd7cb78a 100755 --- a/win32_deps_build.sh +++ b/win32_deps_build.sh @@ -40,8 +40,8 @@ dokanTag="v2.0.5.1000" dokanSrcDir="${depsSrcDir}/dokany" dokanLibDir="${depsToolsetDir}/dokany/lib" -mingwLlvmUrl="https://github.com/mstorsjo/llvm-mingw/releases/download/20230320/llvm-mingw-20230320-msvcrt-ubuntu-18.04-x86_64.tar.xz" -mingwLlvmSha256Sum="bc97745e702fb9e8f2a16f7d09dd5061ceeef16554dd12e542f619ce937e8d7a" +mingwLlvmUrl="https://github.com/mstorsjo/llvm-mingw/releases/download/20230320/llvm-mingw-20230320-ucrt-ubuntu-18.04-x86_64.tar.xz" +mingwLlvmSha256Sum="bc367753dea829d219be32e2e64e2d15d03158ce8e700ae5210ca3d78e6a07ea" mingwLlvmDir="${DEPS_DIR}/mingw-llvm" function _make() { -- 2.39.5